summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-14 21:01:51 +0000
committerkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-14 21:01:51 +0000
commite3b6c7c7ebca1b051dbaa6f33494e92f5638fcc9 (patch)
treef98e60ecbeb320a9e9d8bdacfa3f9231cd55ddda
parentfb96811d15f83c6b692e8e00d458eef32032af6a (diff)
Import CSV 3.0.8
This includes performance improvements and backward incompatibility fixes. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67560 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--NEWS2
-rw-r--r--lib/csv.rb129
-rw-r--r--lib/csv/csv.gemspec1
-rw-r--r--lib/csv/delete_suffix.rb18
-rw-r--r--lib/csv/parser.rb502
-rw-r--r--lib/csv/version.rb2
-rw-r--r--lib/csv/writer.rb3
-rw-r--r--test/csv/helper.rb13
-rw-r--r--test/csv/interface/test_delegation.rb47
-rw-r--r--test/csv/interface/test_read.rb277
-rw-r--r--test/csv/interface/test_read_write.rb51
-rw-r--r--test/csv/interface/test_write.rb174
-rw-r--r--test/csv/parse/test_general.rb4
-rw-r--r--test/csv/parse/test_invalid.rb36
-rw-r--r--test/csv/parse/test_liberal_parsing.rb75
-rw-r--r--test/csv/parse/test_quote_char_nil.rb93
-rw-r--r--test/csv/parse/test_row_separator.rb16
-rw-r--r--test/csv/parse/test_skip_lines.rb105
-rw-r--r--test/csv/parse/test_strip.rb48
-rwxr-xr-xtest/csv/test_encodings.rb11
-rwxr-xr-xtest/csv/test_features.rb74
-rwxr-xr-xtest/csv/test_interface.rb450
-rw-r--r--test/csv/write/test_converters.rb53
23 files changed, 1534 insertions, 650 deletions
diff --git a/NEWS b/NEWS
index 75fa91798d6..0d05ce6f619 100644
--- a/NEWS
+++ b/NEWS
@@ -62,7 +62,7 @@ Regexp/String::
CSV::
- * Upgrade to 3.0.4.
+ * Upgrade to 3.0.8.
See https://github.com/ruby/csv/blob/master/NEWS.md.
Date::
diff --git a/lib/csv.rb b/lib/csv.rb
index 1a173c6d683..1239554ad62 100644
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -504,9 +504,9 @@ class CSV
# <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
# but transcode it to UTF-8 before CSV parses it.
#
- def self.foreach(path, **options, &block)
- return to_enum(__method__, path, options) unless block_given?
- open(path, options) do |csv|
+ def self.foreach(path, mode="r", **options, &block)
+ return to_enum(__method__, path, mode, options) unless block_given?
+ open(path, mode, options) do |csv|
csv.each(&block)
end
end
@@ -885,6 +885,10 @@ class CSV
# blank string field is replaced by
# the set object.
# <b><tt>:quote_empty</tt></b>:: TODO
+ # <b><tt>:write_converters</tt></b>:: TODO
+ # <b><tt>:write_nil_value</tt></b>:: TODO
+ # <b><tt>:write_empty_value</tt></b>:: TODO
+ # <b><tt>:strip</tt></b>:: TODO
#
# See CSV::DEFAULT_OPTIONS for the default settings.
#
@@ -911,7 +915,11 @@ class CSV
encoding: nil,
nil_value: nil,
empty_value: "",
- quote_empty: true)
+ quote_empty: true,
+ write_converters: nil,
+ write_nil_value: nil,
+ write_empty_value: "",
+ strip: false)
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
# create the IO object we will read from
@@ -922,8 +930,13 @@ class CSV
nil_value: nil_value,
empty_value: empty_value,
}
+ @write_fields_converter_options = {
+ nil_value: write_nil_value,
+ empty_value: write_empty_value,
+ }
@initial_converters = converters
@initial_header_converters = header_converters
+ @initial_write_converters = write_converters
@parser_options = {
column_separator: col_sep,
@@ -939,6 +952,7 @@ class CSV
encoding: @encoding,
nil_value: nil_value,
empty_value: empty_value,
+ strip: strip,
}
@parser = nil
@@ -998,7 +1012,7 @@ class CSV
# as is.
#
def converters
- fields_converter.map do |converter|
+ parser_fields_converter.map do |converter|
name = Converters.rassoc(converter)
name ? name.first : converter
end
@@ -1098,12 +1112,58 @@ class CSV
### IO and StringIO Delegation ###
extend Forwardable
- def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
- :closed?, :eof, :eof?, :external_encoding, :fcntl,
- :fileno, :flock, :flush, :fsync, :internal_encoding,
- :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
- :seek, :stat, :string, :sync, :sync=, :tell, :to_i,
- :to_io, :truncate, :tty?
+ def_delegators :@io, :binmode, :close, :close_read, :close_write,
+ :closed?, :external_encoding, :fcntl,
+ :fileno, :flush, :fsync, :internal_encoding,
+ :isatty, :pid, :pos, :pos=, :reopen,
+ :seek, :string, :sync, :sync=, :tell,
+ :truncate, :tty?
+
+ def binmode?
+ if @io.respond_to?(:binmode?)
+ @io.binmode?
+ else
+ false
+ end
+ end
+
+ def flock(*args)
+ raise NotImplementedError unless @io.respond_to?(:flock)
+ @io.flock(*args)
+ end
+
+ def ioctl(*args)
+ raise NotImplementedError unless @io.respond_to?(:ioctl)
+ @io.ioctl(*args)
+ end
+
+ def path
+ @io.path if @io.respond_to?(:path)
+ end
+
+ def stat(*args)
+ raise NotImplementedError unless @io.respond_to?(:stat)
+ @io.stat(*args)
+ end
+
+ def to_i
+ raise NotImplementedError unless @io.respond_to?(:to_i)
+ @io.to_i
+ end
+
+ def to_io
+ @io.respond_to?(:to_io) ? @io.to_io : @io
+ end
+
+ def eof?
+ begin
+ parser_enumerator.peek
+ false
+ rescue StopIteration
+ true
+ end
+ end
+ alias_method :eof, :eof?
# Rewinds the underlying IO object and resets CSV's lineno() counter.
def rewind
@@ -1145,7 +1205,7 @@ class CSV
# converted field or the field itself.
#
def convert(name = nil, &converter)
- fields_converter.add_converter(name, &converter)
+ parser_fields_converter.add_converter(name, &converter)
end
#
@@ -1173,7 +1233,7 @@ class CSV
# The data source must be open for reading.
#
def each(&block)
- parser.parse(&block)
+ parser_enumerator.each(&block)
end
#
@@ -1204,9 +1264,8 @@ class CSV
# The data source must be open for reading.
#
def shift
- @parser_enumerator ||= parser.parse
begin
- @parser_enumerator.next
+ parser_enumerator.next
rescue StopIteration
nil
end
@@ -1299,7 +1358,7 @@ class CSV
if headers
header_fields_converter.convert(fields, nil, 0)
else
- fields_converter.convert(fields, @headers, lineno)
+ parser_fields_converter.convert(fields, @headers, lineno)
end
end
@@ -1316,20 +1375,16 @@ class CSV
end
end
- def fields_converter
- @fields_converter ||= build_fields_converter
+ def parser_fields_converter
+ @parser_fields_converter ||= build_parser_fields_converter
end
- def build_fields_converter
+ def build_parser_fields_converter
specific_options = {
builtin_converters: Converters,
}
options = @base_fields_converter_options.merge(specific_options)
- fields_converter = FieldsConverter.new(options)
- normalize_converters(@initial_converters).each do |name, converter|
- fields_converter.add_converter(name, &converter)
- end
- fields_converter
+ build_fields_converter(@initial_converters, options)
end
def header_fields_converter
@@ -1342,8 +1397,21 @@ class CSV
accept_nil: true,
}
options = @base_fields_converter_options.merge(specific_options)
+ build_fields_converter(@initial_header_converters, options)
+ end
+
+ def writer_fields_converter
+ @writer_fields_converter ||= build_writer_fields_converter
+ end
+
+ def build_writer_fields_converter
+ build_fields_converter(@initial_write_converters,
+ @write_fields_converter_options)
+ end
+
+ def build_fields_converter(initial_converters, options)
fields_converter = FieldsConverter.new(options)
- normalize_converters(@initial_header_converters).each do |name, converter|
+ normalize_converters(initial_converters).each do |name, converter|
fields_converter.add_converter(name, &converter)
end
fields_converter
@@ -1354,8 +1422,12 @@ class CSV
end
def parser_options
- @parser_options.merge(fields_converter: fields_converter,
- header_fields_converter: header_fields_converter)
+ @parser_options.merge(header_fields_converter: header_fields_converter,
+ fields_converter: parser_fields_converter)
+ end
+
+ def parser_enumerator
+ @parser_enumerator ||= parser.parse
end
def writer
@@ -1363,7 +1435,8 @@ class CSV
end
def writer_options
- @writer_options.merge(header_fields_converter: header_fields_converter)
+ @writer_options.merge(header_fields_converter: header_fields_converter,
+ fields_converter: writer_fields_converter)
end
end
diff --git a/lib/csv/csv.gemspec b/lib/csv/csv.gemspec
index f57d9efb7d1..98110bc13c4 100644
--- a/lib/csv/csv.gemspec
+++ b/lib/csv/csv.gemspec
@@ -25,6 +25,7 @@ Gem::Specification.new do |spec|
"lib/csv.rb",
"lib/csv/core_ext/array.rb",
"lib/csv/core_ext/string.rb",
+ "lib/csv/delete_suffix.rb",
"lib/csv/fields_converter.rb",
"lib/csv/match_p.rb",
"lib/csv/parser.rb",
diff --git a/lib/csv/delete_suffix.rb b/lib/csv/delete_suffix.rb
new file mode 100644
index 00000000000..e0b40c7aab4
--- /dev/null
+++ b/lib/csv/delete_suffix.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+# This provides String#delete_suffix? for Ruby 2.4.
+unless String.method_defined?(:delete_suffix)
+ class CSV
+ module DeleteSuffix
+ refine String do
+ def delete_suffix(suffix)
+ if end_with?(suffix)
+ self[0..(-(suffix.size + 1))]
+ else
+ self
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index e6cbc074618..85252203e42 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -2,10 +2,12 @@
require "strscan"
+require_relative "delete_suffix"
require_relative "match_p"
require_relative "row"
require_relative "table"
+using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV
@@ -21,6 +23,15 @@ class CSV
@keeps = []
end
+ def each_line(row_separator)
+ position = pos
+ rest.each_line(row_separator) do |line|
+ position += line.bytesize
+ self.pos = position
+ yield(line)
+ end
+ end
+
def keep_start
@keeps.push(pos)
end
@@ -49,6 +60,50 @@ class CSV
read_chunk
end
+ def each_line(row_separator)
+ buffer = nil
+ input = @scanner.rest
+ position = @scanner.pos
+ offset = 0
+ n_row_separator_chars = row_separator.size
+ while true
+ input.each_line(row_separator) do |line|
+ @scanner.pos += line.bytesize
+ if buffer
+ if n_row_separator_chars == 2 and
+ buffer.end_with?(row_separator[0]) and
+ line.start_with?(row_separator[1])
+ buffer << line[0]
+ line = line[1..-1]
+ position += buffer.bytesize + offset
+ @scanner.pos = position
+ offset = 0
+ yield(buffer)
+ buffer = nil
+ next if line.empty?
+ else
+ buffer << line
+ line = buffer
+ buffer = nil
+ end
+ end
+ if line.end_with?(row_separator)
+ position += line.bytesize + offset
+ @scanner.pos = position
+ offset = 0
+ yield(line)
+ else
+ buffer = line
+ end
+ end
+ break unless read_chunk
+ input = @scanner.rest
+ position = @scanner.pos
+ offset = -buffer.bytesize if buffer
+ end
+ yield(buffer) if buffer
+ end
+
def scan(pattern)
value = @scanner.scan(pattern)
return value if @last_scanner
@@ -94,7 +149,7 @@ class CSV
start, buffer = @keeps.pop
if buffer
string = @scanner.string
- keep = string[start, string.size - start]
+ keep = string.byteslice(start, string.bytesize - start)
if keep and not keep.empty?
@inputs.unshift(StringIO.new(keep))
@last_scanner = false
@@ -103,6 +158,7 @@ class CSV
else
@scanner.pos = start
end
+ read_chunk if @scanner.eos?
end
def keep_drop
@@ -121,7 +177,7 @@ class CSV
keep = @keeps.last
keep_start = keep[0]
string = @scanner.string
- keep_data = string[keep_start, @scanner.pos - keep_start]
+ keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
if keep_data
keep_buffer = keep[1]
if keep_buffer
@@ -170,7 +226,6 @@ class CSV
@input = input
@options = options
@samples = []
- @parsed = false
prepare
end
@@ -230,9 +285,7 @@ class CSV
def parse(&block)
return to_enum(__method__) unless block_given?
- return if @parsed
-
- if @return_headers and @headers
+ if @return_headers and @headers and @raw_headers
headers = Row.new(@headers, @raw_headers, true)
if @unconverted_fields
headers = add_unconverted_fields(headers, [])
@@ -240,58 +293,25 @@ class CSV
yield headers
end
- row = []
begin
- @scanner = build_scanner
- skip_needless_lines
- start_row
- while true
- @quoted_column_value = false
- @unquoted_column_value = false
- value = parse_column_value
- if value and @field_size_limit and value.size >= @field_size_limit
- raise MalformedCSVError.new("Field size exceeded", @lineno + 1)
- end
- if parse_column_end
- row << value
- elsif parse_row_end
- if row.empty? and value.nil?
- emit_row([], &block) unless @skip_blanks
- else
- row << value
- emit_row(row, &block)
- row = []
- end
- skip_needless_lines
- start_row
- elsif @scanner.eos?
- break if row.empty? and value.nil?
- row << value
- emit_row(row, &block)
- break
- else
- if @quoted_column_value
- message = "Do not allow except col_sep_split_separator " +
- "after quoted fields"
- raise MalformedCSVError.new(message, @lineno + 1)
- elsif @unquoted_column_value and @scanner.scan(@cr_or_lf)
- message = "Unquoted fields do not allow \\r or \\n"
- raise MalformedCSVError.new(message, @lineno + 1)
- elsif @scanner.rest.start_with?(@quote_character)
- message = "Illegal quoting"
- raise MalformedCSVError.new(message, @lineno + 1)
- else
- raise MalformedCSVError.new("TODO: Meaningful message",
- @lineno + 1)
- end
- end
+ @scanner ||= build_scanner
+ if quote_character.nil?
+ parse_no_quote(&block)
+ elsif @need_robust_parsing
+ parse_quotable_robust(&block)
+ else
+ parse_quotable_loose(&block)
end
rescue InvalidEncoding
+ if @scanner
+ ignore_broken_line
+ lineno = @lineno
+ else
+ lineno = @lineno + 1
+ end
message = "Invalid byte sequence in #{@encoding}"
- raise MalformedCSVError.new(message, @lineno + 1)
+ raise MalformedCSVError.new(message, lineno)
end
-
- @parsed = true
end
def use_headers?
@@ -301,13 +321,20 @@ class CSV
private
def prepare
prepare_variable
- prepare_regexp
+ prepare_quote_character
+ prepare_backslash
+ prepare_skip_lines
+ prepare_strip
+ prepare_separators
+ prepare_quoted
+ prepare_unquoted
prepare_line
prepare_header
prepare_parser
end
def prepare_variable
+ @need_robust_parsing = false
@encoding = @options[:encoding]
liberal_parsing = @options[:liberal_parsing]
if liberal_parsing
@@ -315,11 +342,15 @@ class CSV
if liberal_parsing.is_a?(Hash)
@double_quote_outside_quote =
liberal_parsing[:double_quote_outside_quote]
+ @backslash_quote = liberal_parsing[:backslash_quote]
else
@double_quote_outside_quote = false
+ @backslash_quote = false
end
+ @need_robust_parsing = true
else
@liberal_parsing = false
+ @backslash_quote = false
end
@unconverted_fields = @options[:unconverted_fields]
@field_size_limit = @options[:field_size_limit]
@@ -328,20 +359,39 @@ class CSV
@header_fields_converter = @options[:header_fields_converter]
end
- def prepare_regexp
- @column_separator = @options[:column_separator].to_s.encode(@encoding)
- @row_separator =
- resolve_row_separator(@options[:row_separator]).encode(@encoding)
- @quote_character = @options[:quote_character].to_s.encode(@encoding)
- if @quote_character.length != 1
- raise ArgumentError, ":quote_char has to be a single character String"
+ def prepare_quote_character
+ @quote_character = @options[:quote_character]
+ if @quote_character.nil?
+ @escaped_quote_character = nil
+ @escaped_quote = nil
+ else
+ @quote_character = @quote_character.to_s.encode(@encoding)
+ if @quote_character.length != 1
+ message = ":quote_char has to be nil or a single character String"
+ raise ArgumentError, message
+ end
+ @double_quote_character = @quote_character * 2
+ @escaped_quote_character = Regexp.escape(@quote_character)
+ @escaped_quote = Regexp.new(@escaped_quote_character)
end
+ end
- escaped_column_separator = Regexp.escape(@column_separator)
- escaped_first_column_separator = Regexp.escape(@column_separator[0])
- escaped_row_separator = Regexp.escape(@row_separator)
- escaped_quote_character = Regexp.escape(@quote_character)
+ def prepare_backslash
+ return unless @backslash_quote
+ @backslash_character = "\\".encode(@encoding)
+
+ @escaped_backslash_character = Regexp.escape(@backslash_character)
+ @escaped_backslash = Regexp.new(@escaped_backslash_character)
+ if @quote_character.nil?
+ @backslash_quote_character = nil
+ else
+ @backslash_quote_character =
+ @backslash_character + @escaped_quote_character
+ end
+ end
+
+ def prepare_skip_lines
skip_lines = @options[:skip_lines]
case skip_lines
when String
@@ -356,18 +406,71 @@ class CSV
end
@skip_lines = skip_lines
end
+ end
+
+ def prepare_strip
+ @strip = @options[:strip]
+ @escaped_strip = nil
+ @strip_value = nil
+ if @strip.is_a?(String)
+ case @strip.length
+ when 0
+ raise ArgumentError, ":strip must not be an empty String"
+ when 1
+ # ok
+ else
+ raise ArgumentError, ":strip doesn't support 2 or more characters yet"
+ end
+ @strip = @strip.encode(@encoding)
+ @escaped_strip = Regexp.escape(@strip)
+ if @quote_character
+ @strip_value = Regexp.new(@escaped_strip +
+ "+".encode(@encoding))
+ end
+ @need_robust_parsing = true
+ elsif @strip
+ strip_values = " \t\r\n\f\v"
+ @escaped_strip = strip_values.encode(@encoding)
+ if @quote_character
+ @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
+ end
+ @need_robust_parsing = true
+ end
+ end
- @column_end = Regexp.new(escaped_column_separator)
+ begin
+ StringScanner.new("x").scan("x")
+ rescue TypeError
+ @@string_scanner_scan_accept_string = false
+ else
+ @@string_scanner_scan_accept_string = true
+ end
+
+ def prepare_separators
+ @column_separator = @options[:column_separator].to_s.encode(@encoding)
+ @row_separator =
+ resolve_row_separator(@options[:row_separator]).encode(@encoding)
+
+ @escaped_column_separator = Regexp.escape(@column_separator)
+ @escaped_first_column_separator = Regexp.escape(@column_separator[0])
if @column_separator.size > 1
+ @column_end = Regexp.new(@escaped_column_separator)
@column_ends = @column_separator.each_char.collect do |char|
Regexp.new(Regexp.escape(char))
end
- @first_column_separators = Regexp.new(escaped_first_column_separator +
+ @first_column_separators = Regexp.new(@escaped_first_column_separator +
"+".encode(@encoding))
else
+ if @@string_scanner_scan_accept_string
+ @column_end = @column_separator
+ else
+ @column_end = Regexp.new(@escaped_column_separator)
+ end
@column_ends = nil
@first_column_separators = nil
end
+
+ escaped_row_separator = Regexp.escape(@row_separator)
@row_end = Regexp.new(escaped_row_separator)
if @row_separator.size > 1
@row_ends = @row_separator.each_char.collect do |char|
@@ -376,25 +479,56 @@ class CSV
else
@row_ends = nil
end
- @quotes = Regexp.new(escaped_quote_character +
- "+".encode(@encoding))
- @quoted_value = Regexp.new("[^".encode(@encoding) +
- escaped_quote_character +
- "]+".encode(@encoding))
- if @liberal_parsing
- @unquoted_value = Regexp.new("[^".encode(@encoding) +
- escaped_first_column_separator +
- "\r\n]+".encode(@encoding))
- else
- @unquoted_value = Regexp.new("[^".encode(@encoding) +
- escaped_quote_character +
- escaped_first_column_separator +
- "\r\n]+".encode(@encoding))
- end
+
+ @cr = "\r".encode(@encoding)
+ @lf = "\n".encode(@encoding)
@cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
@not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
end
+ def prepare_quoted
+ if @quote_character
+ @quotes = Regexp.new(@escaped_quote_character +
+ "+".encode(@encoding))
+ no_quoted_values = @escaped_quote_character.dup
+ if @backslash_quote
+ no_quoted_values << @escaped_backslash_character
+ end
+ @quoted_value = Regexp.new("[^".encode(@encoding) +
+ no_quoted_values +
+ "]+".encode(@encoding))
+ end
+ if @escaped_strip
+ @split_column_separator = Regexp.new(@escaped_strip +
+ "*".encode(@encoding) +
+ @escaped_column_separator +
+ @escaped_strip +
+ "*".encode(@encoding))
+ else
+ if @column_separator == " ".encode(@encoding)
+ @split_column_separator = Regexp.new(@escaped_column_separator)
+ else
+ @split_column_separator = @column_separator
+ end
+ end
+ end
+
+ def prepare_unquoted
+ return if @quote_character.nil?
+
+ no_unquoted_values = "\r\n".encode(@encoding)
+ no_unquoted_values << @escaped_first_column_separator
+ unless @liberal_parsing
+ no_unquoted_values << @escaped_quote_character
+ end
+ if @escaped_strip
+ no_unquoted_values << @escaped_strip
+ end
+ @unquoted_value = Regexp.new("[^".encode(@encoding) +
+ no_unquoted_values +
+ "]+".encode(@encoding))
+ end
+
def resolve_row_separator(separator)
if separator == :auto
cr = "\r".encode(@encoding)
@@ -514,6 +648,8 @@ class CSV
end
def may_quoted?
+ return false if @quote_character.nil?
+
if @input.is_a?(StringIO)
sample = @input.string
else
@@ -534,6 +670,10 @@ class CSV
@io.gets(*args)
end
+ def each_line(*args, &block)
+ @io.each_line(*args, &block)
+ end
+
def eof?
@io.eof?
end
@@ -548,7 +688,10 @@ class CSV
else
inputs << @input
end
- InputsScanner.new(inputs, @encoding, chunk_size: 1)
+ chunk_size = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"
+ InputsScanner.new(inputs,
+ @encoding,
+ chunk_size: Integer(chunk_size, 10))
end
else
def build_scanner
@@ -560,8 +703,13 @@ class CSV
end
if string
unless string.valid_encoding?
- message = "Invalid byte sequence in #{@encoding}"
- raise MalformedCSVError.new(message, @lineno + 1)
+ index = string.lines(@row_separator).index do |line|
+ !line.valid_encoding?
+ end
+ if index
+ message = "Invalid byte sequence in #{@encoding}"
+ raise MalformedCSVError.new(message, @lineno + index + 1)
+ end
end
Scanner.new(string)
else
@@ -582,6 +730,7 @@ class CSV
line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
line << @row_separator if parse_row_end
if skip_line?(line)
+ @lineno += 1
@scanner.keep_drop
else
@scanner.keep_back
@@ -601,6 +750,147 @@ class CSV
end
end
+ def parse_no_quote(&block)
+ @scanner.each_line(@row_separator) do |line|
+ next if @skip_lines and skip_line?(line)
+ original_line = line
+ line = line.delete_suffix(@row_separator)
+
+ if line.empty?
+ next if @skip_blanks
+ row = []
+ else
+ line = strip_value(line)
+ row = line.split(@split_column_separator, -1)
+ n_columns = row.size
+ i = 0
+ while i < n_columns
+ row[i] = nil if row[i].empty?
+ i += 1
+ end
+ end
+ @last_line = original_line
+ emit_row(row, &block)
+ end
+ end
+
+ def parse_quotable_loose(&block)
+ @scanner.keep_start
+ @scanner.each_line(@row_separator) do |line|
+ if @skip_lines and skip_line?(line)
+ @scanner.keep_drop
+ @scanner.keep_start
+ next
+ end
+ original_line = line
+ line = line.delete_suffix(@row_separator)
+
+ if line.empty?
+ if @skip_blanks
+ @scanner.keep_drop
+ @scanner.keep_start
+ next
+ end
+ row = []
+ elsif line.include?(@cr) or line.include?(@lf)
+ @scanner.keep_back
+ @need_robust_parsing = true
+ return parse_quotable_robust(&block)
+ else
+ row = line.split(@split_column_separator, -1)
+ n_columns = row.size
+ i = 0
+ while i < n_columns
+ column = row[i]
+ if column.empty?
+ row[i] = nil
+ else
+ n_quotes = column.count(@quote_character)
+ if n_quotes.zero?
+ # no quote
+ elsif n_quotes == 2 and
+ column.start_with?(@quote_character) and
+ column.end_with?(@quote_character)
+ row[i] = column[1..-2]
+ else
+ @scanner.keep_back
+ @need_robust_parsing = true
+ return parse_quotable_robust(&block)
+ end
+ end
+ i += 1
+ end
+ end
+ @scanner.keep_drop
+ @scanner.keep_start
+ @last_line = original_line
+ emit_row(row, &block)
+ end
+ @scanner.keep_drop
+ end
+
+ def parse_quotable_robust(&block)
+ row = []
+ skip_needless_lines
+ start_row
+ while true
+ @quoted_column_value = false
+ @unquoted_column_value = false
+ @scanner.scan_all(@strip_value) if @strip_value
+ value = parse_column_value
+ if value
+ @scanner.scan_all(@strip_value) if @strip_value
+ if @field_size_limit and value.size >= @field_size_limit
+ ignore_broken_line
+ raise MalformedCSVError.new("Field size exceeded", @lineno)
+ end
+ end
+ if parse_column_end
+ row << value
+ elsif parse_row_end
+ if row.empty? and value.nil?
+ emit_row([], &block) unless @skip_blanks
+ else
+ row << value
+ emit_row(row, &block)
+ row = []
+ end
+ skip_needless_lines
+ start_row
+ elsif @scanner.eos?
+ break if row.empty? and value.nil?
+ row << value
+ emit_row(row, &block)
+ break
+ else
+ if @quoted_column_value
+ ignore_broken_line
+ message = "Any value after quoted field isn't allowed"
+ raise MalformedCSVError.new(message, @lineno)
+ elsif @unquoted_column_value and
+ (new_line = @scanner.scan(@cr_or_lf))
+ ignore_broken_line
+ message = "Unquoted fields do not allow new line " +
+ "<#{new_line.inspect}>"
+ raise MalformedCSVError.new(message, @lineno)
+ elsif @scanner.rest.start_with?(@quote_character)
+ ignore_broken_line
+ message = "Illegal quoting"
+ raise MalformedCSVError.new(message, @lineno)
+ elsif (new_line = @scanner.scan(@cr_or_lf))
+ ignore_broken_line
+ message = "New line must be <#{@row_separator.inspect}> " +
+ "not <#{new_line.inspect}>"
+ raise MalformedCSVError.new(message, @lineno)
+ else
+ ignore_broken_line
+ raise MalformedCSVError.new("TODO: Meaningful message",
+ @lineno)
+ end
+ end
+ end
+ end
+
def parse_column_value
if @liberal_parsing
quoted_value = parse_quoted_column_value
@@ -651,6 +941,7 @@ class CSV
value << sub_value
end
end
+ value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
value
end
@@ -667,10 +958,22 @@ class CSV
while true
quoted_value = @scanner.scan_all(@quoted_value)
value << quoted_value if quoted_value
+ if @backslash_quote
+ if @scanner.scan(@escaped_backslash)
+ if @scanner.scan(@escaped_quote)
+ value << @quote_character
+ else
+ value << @backslash_character
+ end
+ next
+ end
+ end
+
quotes = @scanner.scan_all(@quotes)
unless quotes
+ ignore_broken_line
message = "Unclosed quoted field"
- raise MalformedCSVError.new(message, @lineno + 1)
+ raise MalformedCSVError.new(message, @lineno)
end
n_quotes = quotes.size
if n_quotes == 1
@@ -713,6 +1016,33 @@ class CSV
end
end
+ def strip_value(value)
+ return value unless @strip
+ return nil if value.nil?
+
+ case @strip
+ when String
+ size = value.size
+ while value.start_with?(@strip)
+ size -= 1
+ value = value[1, size]
+ end
+ while value.end_with?(@strip)
+ size -= 1
+ value = value[0, size]
+ end
+ else
+ value.strip!
+ end
+ value
+ end
+
+ def ignore_broken_line
+ @scanner.scan_all(@not_line_end)
+ @scanner.scan_all(@cr_or_lf)
+ @lineno += 1
+ end
+
def start_row
if @last_line
@last_line = nil
diff --git a/lib/csv/version.rb b/lib/csv/version.rb
index 0b4b7d19668..b2b0ad743a9 100644
--- a/lib/csv/version.rb
+++ b/lib/csv/version.rb
@@ -2,5 +2,5 @@
class CSV
# The version of the installed library.
- VERSION = "3.0.4"
+ VERSION = "3.0.9"
end
diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb
index 36db9d4014f..8e0aab32ffe 100644
--- a/lib/csv/writer.rb
+++ b/lib/csv/writer.rb
@@ -18,6 +18,7 @@ class CSV
if @options[:write_headers] and @headers
self << @headers
end
+ @fields_converter = @options[:fields_converter]
end
def <<(row)
@@ -31,6 +32,8 @@ class CSV
@headers ||= row if @use_headers
@lineno += 1
+ row = @fields_converter.convert(row, nil, lineno) if @fields_converter
+
converted_row = row.collect do |field|
quote(field)
end
diff --git a/test/csv/helper.rb b/test/csv/helper.rb
index 4f7b00244b4..2542cc9c979 100644
--- a/test/csv/helper.rb
+++ b/test/csv/helper.rb
@@ -1,5 +1,18 @@
+require "tempfile"
require "test/unit"
require "csv"
require_relative "../lib/with_different_ofs.rb"
+
+module Helper
+ def with_chunk_size(chunk_size)
+ chunk_size_keep = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"]
+ begin
+ ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] = chunk_size
+ yield
+ ensure
+ ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] = chunk_size_keep
+ end
+ end
+end
diff --git a/test/csv/interface/test_delegation.rb b/test/csv/interface/test_delegation.rb
new file mode 100644
index 00000000000..349257633bd
--- /dev/null
+++ b/test/csv/interface/test_delegation.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVInterfaceDelegation < Test::Unit::TestCase
+ class TestStringIO < self
+ def setup
+ @csv = CSV.new("h1,h2")
+ end
+
+ def test_flock
+ assert_raise(NotImplementedError) do
+ @csv.flock(File::LOCK_EX)
+ end
+ end
+
+ def test_ioctl
+ assert_raise(NotImplementedError) do
+ @csv.ioctl(0)
+ end
+ end
+
+ def test_stat
+ assert_raise(NotImplementedError) do
+ @csv.stat
+ end
+ end
+
+ def test_to_i
+ assert_raise(NotImplementedError) do
+ @csv.to_i
+ end
+ end
+
+ def test_binmode?
+ assert_equal(false, @csv.binmode?)
+ end
+
+ def test_path
+ assert_equal(nil, @csv.path)
+ end
+
+ def test_to_io
+ assert_instance_of(StringIO, @csv.to_io)
+ end
+ end
+end
diff --git a/test/csv/interface/test_read.rb b/test/csv/interface/test_read.rb
new file mode 100644
index 00000000000..393619b7631
--- /dev/null
+++ b/test/csv/interface/test_read.rb
@@ -0,0 +1,277 @@
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVInterfaceRead < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def setup
+ super
+ @data = ""
+ @data << "1\t2\t3\r\n"
+ @data << "4\t5\r\n"
+ @input = Tempfile.new(["interface-read", ".csv"], options: {binmode: true})
+ @input << @data
+ @input.rewind
+ @rows = [
+ ["1", "2", "3"],
+ ["4", "5"],
+ ]
+ end
+
+ def teardown
+ @input.close(true)
+ super
+ end
+
+ def test_foreach
+ rows = []
+ CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").each do |row|
+ rows << row
+ end
+ assert_equal(@rows, rows)
+ end
+
+ def test_foreach_mode
+ rows = []
+ CSV.foreach(@input.path, "r", col_sep: "\t", row_sep: "\r\n").each do |row|
+ rows << row
+ end
+ assert_equal(@rows, rows)
+ end
+
+ def test_foreach_enumurator
+ rows = CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").to_a
+ assert_equal(@rows, rows)
+ end
+
+ def test_closed?
+ csv = CSV.open(@input.path, "r+", col_sep: "\t", row_sep: "\r\n")
+ assert_not_predicate(csv, :closed?)
+ csv.close
+ assert_predicate(csv, :closed?)
+ end
+
+ def test_open_auto_close
+ csv = nil
+ CSV.open(@input.path) do |_csv|
+ csv = _csv
+ end
+ assert_predicate(csv, :closed?)
+ end
+
+ def test_open_closed
+ csv = nil
+ CSV.open(@input.path) do |_csv|
+ csv = _csv
+ csv.close
+ end
+ assert_predicate(csv, :closed?)
+ end
+
+ def test_open_block_return_value
+ return_value = CSV.open(@input.path) do
+ "Return value."
+ end
+ assert_equal("Return value.", return_value)
+ end
+
+ def test_open_encoding_valid
+ # U+1F600 GRINNING FACE
+ # U+1F601 GRINNING FACE WITH SMILING EYES
+ File.open(@input.path, "w") do |file|
+ file << "\u{1F600},\u{1F601}"
+ end
+ CSV.open(@input.path, encoding: "utf-8") do |csv|
+ assert_equal([["\u{1F600}", "\u{1F601}"]],
+ csv.to_a)
+ end
+ end
+
+ def test_open_encoding_invalid
+ # U+1F600 GRINNING FACE
+ # U+1F601 GRINNING FACE WITH SMILING EYES
+ File.open(@input.path, "w") do |file|
+ file << "\u{1F600},\u{1F601}"
+ end
+ CSV.open(@input.path, encoding: "EUC-JP") do |csv|
+ error = assert_raise(CSV::MalformedCSVError) do
+ csv.shift
+ end
+ assert_equal("Invalid byte sequence in EUC-JP in line 1.",
+ error.message)
+ end
+ end
+
+ def test_open_encoding_nonexistent
+ _output, error = capture_io do
+ CSV.open(@input.path, encoding: "nonexistent") do
+ end
+ end
+ assert_equal("path:0: warning: Unsupported encoding nonexistent ignored\n",
+ error.gsub(/\A.+:\d+: /, "path:0: "))
+ end
+
+ def test_open_encoding_utf_8_with_bom
+ # U+FEFF ZERO WIDTH NO-BREAK SPACE, BOM
+ # U+1F600 GRINNING FACE
+ # U+1F601 GRINNING FACE WITH SMILING EYES
+ File.open(@input.path, "w") do |file|
+ file << "\u{FEFF}\u{1F600},\u{1F601}"
+ end
+ CSV.open(@input.path, encoding: "bom|utf-8") do |csv|
+ assert_equal([["\u{1F600}", "\u{1F601}"]],
+ csv.to_a)
+ end
+ end
+
+ def test_parse
+ assert_equal(@rows,
+ CSV.parse(@data, col_sep: "\t", row_sep: "\r\n"))
+ end
+
+ def test_parse_block
+ rows = []
+ CSV.parse(@data, col_sep: "\t", row_sep: "\r\n") do |row|
+ rows << row
+ end
+ assert_equal(@rows, rows)
+ end
+
+ def test_parse_enumerator
+ rows = CSV.parse(@data, col_sep: "\t", row_sep: "\r\n").to_a
+ assert_equal(@rows, rows)
+ end
+
+ def test_parse_headers_only
+ table = CSV.parse("a,b,c", headers: true)
+ assert_equal([
+ ["a", "b", "c"],
+ [],
+ ],
+ [
+ table.headers,
+ table.each.to_a,
+ ])
+ end
+
+ def test_parse_line
+ assert_equal(["1", "2", "3"],
+ CSV.parse_line("1;2;3", col_sep: ";"))
+ end
+
+ def test_parse_line_shortcut
+ assert_equal(["1", "2", "3"],
+ "1;2;3".parse_csv(col_sep: ";"))
+ end
+
+ def test_parse_line_empty
+ assert_equal(nil, CSV.parse_line("")) # to signal eof
+ end
+
+ def test_parse_line_empty_line
+ assert_equal([], CSV.parse_line("\n1,2,3"))
+ end
+
+ def test_read
+ assert_equal(@rows,
+ CSV.read(@input.path, col_sep: "\t", row_sep: "\r\n"))
+ end
+
+ def test_readlines
+ assert_equal(@rows,
+ CSV.readlines(@input.path, col_sep: "\t", row_sep: "\r\n"))
+ end
+
+ def test_open_read
+ rows = CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
+ csv.read
+ end
+ assert_equal(@rows, rows)
+ end
+
+ def test_open_readlines
+ rows = CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
+ csv.readlines
+ end
+ assert_equal(@rows, rows)
+ end
+
+ def test_table
+ table = CSV.table(@input.path, col_sep: "\t", row_sep: "\r\n")
+ assert_equal(CSV::Table.new([
+ CSV::Row.new([:"1", :"2", :"3"], [4, 5, nil]),
+ ]),
+ table)
+ end
+
+ def test_shift # aliased as gets() and readline()
+ CSV.open(@input.path, "rb+", col_sep: "\t", row_sep: "\r\n") do |csv|
+ rows = [
+ csv.shift,
+ csv.shift,
+ csv.shift,
+ ]
+ assert_equal(@rows + [nil],
+ rows)
+ end
+ end
+
+ def test_enumerator
+ CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
+ assert_equal(@rows, csv.each.to_a)
+ end
+ end
+
+ def test_shift_and_each
+ CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
+ rows = []
+ rows << csv.shift
+ rows.concat(csv.each.to_a)
+ assert_equal(@rows, rows)
+ end
+ end
+
+ def test_each_twice
+ CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
+ assert_equal([
+ @rows,
+ [],
+ ],
+ [
+ csv.each.to_a,
+ csv.each.to_a,
+ ])
+ end
+ end
+
+ def test_eof?
+ eofs = []
+ CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv|
+ eofs << csv.eof?
+ csv.shift
+ eofs << csv.eof?
+ csv.shift
+ eofs << csv.eof?
+ end
+ assert_equal([false, false, true],
+ eofs)
+ end
+
+ def test_new_nil
+ assert_raise_with_message ArgumentError, "Cannot parse nil as CSV" do
+ CSV.new(nil)
+ end
+ end
+
+ def test_options_not_modified
+ options = {}.freeze
+ CSV.foreach(@input.path, options)
+ CSV.open(@input.path, options) {}
+ CSV.parse("", options)
+ CSV.parse_line("", options)
+ CSV.read(@input.path, options)
+ CSV.readlines(@input.path, options)
+ CSV.table(@input.path, options)
+ end
+end
diff --git a/test/csv/interface/test_read_write.rb b/test/csv/interface/test_read_write.rb
new file mode 100644
index 00000000000..9ce3553d61b
--- /dev/null
+++ b/test/csv/interface/test_read_write.rb
@@ -0,0 +1,51 @@
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVInterfaceReadWrite < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_filter
+ rows = [[1, 2, 3], [4, 5]]
+ input = <<-CSV
+1;2;3
+4;5
+ CSV
+ output = ""
+ CSV.filter(input, output,
+ in_col_sep: ";",
+ out_col_sep: ",",
+ converters: :all) do |row|
+ row.map! {|n| n * 2}
+ row << "Added\r"
+ end
+ assert_equal(<<-CSV, output)
+2,4,6,"Added\r"
+8,10,"Added\r"
+ CSV
+ end
+
+ def test_instance_same
+ data = ""
+ assert_equal(CSV.instance(data, col_sep: ";").object_id,
+ CSV.instance(data, col_sep: ";").object_id)
+ end
+
+ def test_instance_append
+ output = ""
+ CSV.instance(output, col_sep: ";") << ["a", "b", "c"]
+ assert_equal(<<-CSV, output)
+a;b;c
+ CSV
+ CSV.instance(output, col_sep: ";") << [1, 2, 3]
+ assert_equal(<<-CSV, output)
+a;b;c
+1;2;3
+ CSV
+ end
+
+ def test_instance_shortcut
+ assert_equal(CSV.instance,
+ CSV {|csv| csv})
+ end
+end
diff --git a/test/csv/interface/test_write.rb b/test/csv/interface/test_write.rb
new file mode 100644
index 00000000000..8511204ef0c
--- /dev/null
+++ b/test/csv/interface/test_write.rb
@@ -0,0 +1,174 @@
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVInterfaceWrite < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def setup
+ super
+ @output = Tempfile.new(["interface-write", ".csv"])
+ end
+
+ def teardown
+ @output.close(true)
+ super
+ end
+
+ def test_generate_default
+ csv_text = CSV.generate do |csv|
+ csv << [1, 2, 3] << [4, nil, 5]
+ end
+ assert_equal(<<-CSV, csv_text)
+1,2,3
+4,,5
+ CSV
+ end
+
+ def test_generate_append
+ csv_text = <<-CSV
+1,2,3
+4,,5
+ CSV
+ CSV.generate(csv_text) do |csv|
+ csv << ["last", %Q{"row"}]
+ end
+ assert_equal(<<-CSV, csv_text)
+1,2,3
+4,,5
+last,"""row"""
+ CSV
+ end
+
+ def test_generate_no_new_line
+ csv_text = CSV.generate("test") do |csv|
+ csv << ["row"]
+ end
+ assert_equal(<<-CSV, csv_text)
+testrow
+ CSV
+ end
+
+ def test_generate_line_col_sep
+ line = CSV.generate_line(["1", "2", "3"], col_sep: ";")
+ assert_equal(<<-LINE, line)
+1;2;3
+ LINE
+ end
+
+ def test_generate_line_row_sep
+ line = CSV.generate_line(["1", "2"], row_sep: nil)
+ assert_equal(<<-LINE.chomp, line)
+1,2
+ LINE
+ end
+
+ def test_generate_line_shortcut
+ line = ["1", "2", "3"].to_csv(col_sep: ";")
+ assert_equal(<<-LINE, line)
+1;2;3
+ LINE
+ end
+
+ def test_headers_detection
+ headers = ["a", "b", "c"]
+ CSV.open(@output.path, "w", headers: true) do |csv|
+ csv << headers
+ csv << ["1", "2", "3"]
+ assert_equal(headers, csv.headers)
+ end
+ end
+
+ def test_lineno
+ CSV.open(@output.path, "w") do |csv|
+ n_lines = 20
+ n_lines.times do
+ csv << ["a", "b", "c"]
+ end
+ assert_equal(n_lines, csv.lineno)
+ end
+ end
+
+ def test_append_row
+ CSV.open(@output.path, "wb") do |csv|
+ csv <<
+ CSV::Row.new([], ["1", "2", "3"]) <<
+ CSV::Row.new([], ["a", "b", "c"])
+ end
+ assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
+1,2,3
+a,b,c
+ CSV
+ end
+
+ def test_append_hash
+ CSV.open(@output.path, "wb", headers: true) do |csv|
+ csv << [:a, :b, :c]
+ csv << {a: 1, b: 2, c: 3}
+ csv << {a: 4, b: 5, c: 6}
+ end
+ assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
+a,b,c
+1,2,3
+4,5,6
+ CSV
+ end
+
+ def test_append_hash_headers_array
+ CSV.open(@output.path, "wb", headers: [:b, :a, :c]) do |csv|
+ csv << {a: 1, b: 2, c: 3}
+ csv << {a: 4, b: 5, c: 6}
+ end
+ assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
+2,1,3
+5,4,6
+ CSV
+ end
+
+ def test_append_hash_headers_string
+ CSV.open(@output.path, "wb", headers: "b|a|c", col_sep: "|") do |csv|
+ csv << {"a" => 1, "b" => 2, "c" => 3}
+ csv << {"a" => 4, "b" => 5, "c" => 6}
+ end
+ assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
+2|1|3
+5|4|6
+ CSV
+ end
+
+ def test_write_headers
+ CSV.open(@output.path,
+ "wb",
+ headers: "b|a|c",
+ write_headers: true,
+ col_sep: "|" ) do |csv|
+ csv << {"a" => 1, "b" => 2, "c" => 3}
+ csv << {"a" => 4, "b" => 5, "c" => 6}
+ end
+ assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
+b|a|c
+2|1|3
+5|4|6
+ CSV
+ end
+
+ def test_write_headers_empty
+ CSV.open(@output.path,
+ "wb",
+ headers: "b|a|c",
+ write_headers: true,
+ col_sep: "|" ) do |csv|
+ end
+ assert_equal(<<-CSV, File.read(@output.path, mode: "rb"))
+b|a|c
+ CSV
+ end
+
+ def test_options_not_modified
+ options = {}.freeze
+ CSV.generate(options) {}
+ CSV.generate_line([], options)
+ CSV.filter("", "", options)
+ CSV.instance("", options)
+ end
+end
diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb
index 2f235f16f6c..49222c7159a 100644
--- a/test/csv/parse/test_general.rb
+++ b/test/csv/parse/test_general.rb
@@ -142,7 +142,7 @@ class TestCSVParseGeneral < Test::Unit::TestCase
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,2\r,3", row_sep: "\n")
end
- assert_equal("Unquoted fields do not allow \\r or \\n in line 1.",
+ assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.",
error.message)
end
@@ -158,7 +158,7 @@ line,5,jkl
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse(csv)
end
- assert_equal("Unquoted fields do not allow \\r or \\n in line 4.",
+ assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.",
error.message)
end
diff --git a/test/csv/parse/test_invalid.rb b/test/csv/parse/test_invalid.rb
new file mode 100644
index 00000000000..b84707c2cc1
--- /dev/null
+++ b/test/csv/parse/test_invalid.rb
@@ -0,0 +1,36 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseInvalid < Test::Unit::TestCase
+ def test_no_column_mixed_new_lines
+ error = assert_raise(CSV::MalformedCSVError) do
+ CSV.parse("\n" +
+ "\r")
+ end
+ assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.",
+ error.message)
+ end
+
+ def test_ignore_invalid_line
+ csv = CSV.new(<<-CSV, headers: true, return_headers: true)
+head1,head2,head3
+aaa,bbb,ccc
+ddd,ee"e.fff
+ggg,hhh,iii
+ CSV
+ headers = ["head1", "head2", "head3"]
+ assert_equal(CSV::Row.new(headers, headers),
+ csv.shift)
+ assert_equal(CSV::Row.new(headers, ["aaa", "bbb", "ccc"]),
+ csv.shift)
+ error = assert_raise(CSV::MalformedCSVError) do
+ csv.shift
+ end
+ assert_equal("Illegal quoting in line 3.",
+ error.message)
+ assert_equal(CSV::Row.new(headers, ["ggg", "hhh", "iii"]),
+ csv.shift)
+ end
+end
diff --git a/test/csv/parse/test_liberal_parsing.rb b/test/csv/parse/test_liberal_parsing.rb
index 22b1689a37e..2f7b34689f1 100644
--- a/test/csv/parse/test_liberal_parsing.rb
+++ b/test/csv/parse/test_liberal_parsing.rb
@@ -22,8 +22,7 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line(input)
end
- assert_equal("Do not allow except col_sep_split_separator " +
- "after quoted fields in line 1.",
+ assert_equal("Any value after quoted field isn't allowed in line 1.",
error.message)
assert_equal(['"quoted" field'],
CSV.parse_line(input, liberal_parsing: true))
@@ -75,8 +74,7 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse(data)
end
- assert_equal("Do not allow except col_sep_split_separator " +
- "after quoted fields in line 1.",
+ assert_equal("Any value after quoted field isn't allowed in line 1.",
error.message)
assert_equal([
[["a", %Q{""b""}]],
@@ -90,4 +88,73 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase
}),
])
end
+
+ class TestBackslashQuote < Test::Unit::TestCase
+ extend ::DifferentOFS
+
+ def test_double_quote_outside_quote
+ data = %Q{a,""b""}
+ assert_equal([
+ [["a", %Q{""b""}]],
+ [["a", %Q{"b"}]],
+ ],
+ [
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true
+ }),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true,
+ double_quote_outside_quote: true
+ }),
+ ])
+ end
+
+ def test_unquoted_value
+ data = %q{\"\"a\"\"}
+ assert_equal([
+ [[%q{\"\"a\"\"}]],
+ [[%q{""a""}]],
+ ],
+ [
+ CSV.parse(data, liberal_parsing: true),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true
+ }),
+ ])
+ end
+
+ def test_unquoted_value_multiple_characters_col_sep
+ data = %q{a<\\"b<=>x}
+ assert_equal([[%Q{a<"b}, "x"]],
+ CSV.parse(data,
+ col_sep: "<=>",
+ liberal_parsing: {
+ backslash_quote: true
+ }))
+ end
+
+ def test_quoted_value
+ data = %q{"\"\"a\"\""}
+ assert_equal([
+ [[%q{"\"\"a\"\""}]],
+ [[%q{""a""}]],
+ [[%q{""a""}]],
+ ],
+ [
+ CSV.parse(data, liberal_parsing: true),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true
+ }),
+ CSV.parse(data,
+ liberal_parsing: {
+ backslash_quote: true,
+ double_quote_outside_quote: true
+ }),
+ ])
+ end
+ end
end
diff --git a/test/csv/parse/test_quote_char_nil.rb b/test/csv/parse/test_quote_char_nil.rb
new file mode 100644
index 00000000000..fc3b646759b
--- /dev/null
+++ b/test/csv/parse/test_quote_char_nil.rb
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseQuoteCharNil < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_full
+ assert_equal(["a", "b"], CSV.parse_line(%Q{a,b}, quote_char: nil))
+ end
+
+ def test_end_with_nil
+ assert_equal(["a", nil, nil, nil], CSV.parse_line(%Q{a,,,}, quote_char: nil))
+ end
+
+ def test_nil_nil
+ assert_equal([nil, nil], CSV.parse_line(%Q{,}, quote_char: nil))
+ end
+
+ def test_unquoted_value_multiple_characters_col_sep
+ data = %q{a<b<=>x}
+ assert_equal([[%Q{a<b}, "x"]], CSV.parse(data, col_sep: "<=>", quote_char: nil))
+ end
+
+ def test_csv_header_string
+ data = <<~DATA
+ first,second,third
+ A,B,C
+ 1,2,3
+ DATA
+ assert_equal(
+ CSV::Table.new([
+ CSV::Row.new(["my", "new", "headers"], ["first", "second", "third"]),
+ CSV::Row.new(["my", "new", "headers"], ["A", "B", "C"]),
+ CSV::Row.new(["my", "new", "headers"], ["1", "2", "3"])
+ ]),
+ CSV.parse(data, headers: "my,new,headers", quote_char: nil)
+ )
+ end
+
+ def test_comma
+ assert_equal([["a", "b", nil, "d"]],
+ CSV.parse("a,b,,d", col_sep: ",", quote_char: nil))
+ end
+
+ def test_space
+ assert_equal([["a", "b", nil, "d"]],
+ CSV.parse("a b d", col_sep: " ", quote_char: nil))
+ end
+
+ def encode_array(array, encoding)
+ array.collect do |element|
+ element ? element.encode(encoding) : element
+ end
+ end
+
+ def test_space_no_ascii
+ encoding = Encoding::UTF_16LE
+ assert_equal([encode_array(["a", "b", nil, "d"], encoding)],
+ CSV.parse("a b d".encode(encoding),
+ col_sep: " ".encode(encoding),
+ quote_char: nil))
+ end
+
+ def test_multiple_space
+ assert_equal([["a b", nil, "d"]],
+ CSV.parse("a b d", col_sep: " ", quote_char: nil))
+ end
+
+ def test_multiple_characters_leading_empty_fields
+ data = <<-CSV
+<=><=>A<=>B<=>C
+1<=>2<=>3
+ CSV
+ assert_equal([
+ [nil, nil, "A", "B", "C"],
+ ["1", "2", "3"],
+ ],
+ CSV.parse(data, col_sep: "<=>", quote_char: nil))
+ end
+
+ def test_line
+ lines = [
+ "abc,def\n",
+ ]
+ csv = CSV.new(lines.join(""), quote_char: nil)
+ lines.each do |line|
+ csv.shift
+ assert_equal(line, csv.line)
+ end
+ end
+end
diff --git a/test/csv/parse/test_row_separator.rb b/test/csv/parse/test_row_separator.rb
new file mode 100644
index 00000000000..eaf6adc9101
--- /dev/null
+++ b/test/csv/parse/test_row_separator.rb
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseRowSeparator < Test::Unit::TestCase
+ extend DifferentOFS
+ include Helper
+
+ def test_multiple_characters
+ with_chunk_size("1") do
+ assert_equal([["a"], ["b"]],
+ CSV.parse("a\r\nb\r\n", row_sep: "\r\n"))
+ end
+ end
+end
diff --git a/test/csv/parse/test_skip_lines.rb b/test/csv/parse/test_skip_lines.rb
new file mode 100644
index 00000000000..196858f1b07
--- /dev/null
+++ b/test/csv/parse/test_skip_lines.rb
@@ -0,0 +1,105 @@
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseSkipLines < Test::Unit::TestCase
+ extend DifferentOFS
+ include Helper
+
+ def test_default
+ csv = CSV.new("a,b,c\n")
+ assert_nil(csv.skip_lines)
+ end
+
+ def test_regexp
+ csv = <<-CSV
+1
+#2
+ #3
+4
+ CSV
+ assert_equal([
+ ["1"],
+ ["4"],
+ ],
+ CSV.parse(csv, :skip_lines => /\A\s*#/))
+ end
+
+ def test_regexp_quoted
+ csv = <<-CSV
+1
+#2
+"#3"
+4
+ CSV
+ assert_equal([
+ ["1"],
+ ["#3"],
+ ["4"],
+ ],
+ CSV.parse(csv, :skip_lines => /\A\s*#/))
+ end
+
+ def test_string
+ csv = <<-CSV
+1
+.2
+3.
+4
+ CSV
+ assert_equal([
+ ["1"],
+ ["4"],
+ ],
+ CSV.parse(csv, :skip_lines => "."))
+ end
+
+ class RegexStub
+ end
+
+ def test_not_matchable
+ regex_stub = RegexStub.new
+ csv = CSV.new("1\n", :skip_lines => regex_stub)
+ error = assert_raise(ArgumentError) do
+ csv.shift
+ end
+ assert_equal(":skip_lines has to respond to #match: #{regex_stub.inspect}",
+ error.message)
+ end
+
+ class Matchable
+ def initialize(pattern)
+ @pattern = pattern
+ end
+
+ def match(line)
+ @pattern.match(line)
+ end
+ end
+
+ def test_matchable
+ csv = <<-CSV
+1
+# 2
+3
+# 4
+ CSV
+ assert_equal([
+ ["1"],
+ ["3"],
+ ],
+ CSV.parse(csv, :skip_lines => Matchable.new(/\A#/)))
+ end
+
+ def test_multibyte_data
+ # U+3042 HIRAGANA LETTER A
+ # U+3044 HIRAGANA LETTER I
+ # U+3046 HIRAGANA LETTER U
+ value = "\u3042\u3044\u3046"
+ with_chunk_size("5") do
+ assert_equal([[value], [value]],
+ CSV.parse("#{value}\n#{value}\n",
+ :skip_lines => /\A#/))
+ end
+ end
+end
diff --git a/test/csv/parse/test_strip.rb b/test/csv/parse/test_strip.rb
new file mode 100644
index 00000000000..160407bd94f
--- /dev/null
+++ b/test/csv/parse/test_strip.rb
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+class TestCSVParseStrip < Test::Unit::TestCase
+ extend DifferentOFS
+
+ def test_both
+ assert_equal(["a", "b"],
+ CSV.parse_line(%Q{ a , b }, strip: true))
+ end
+
+ def test_left
+ assert_equal(["a", "b"],
+ CSV.parse_line(%Q{ a, b}, strip: true))
+ end
+
+ def test_right
+ assert_equal(["a", "b"],
+ CSV.parse_line(%Q{a ,b }, strip: true))
+ end
+
+ def test_quoted
+ assert_equal([" a ", " b "],
+ CSV.parse_line(%Q{" a "," b "}, strip: true))
+ end
+
+ def test_liberal_parsing
+ assert_equal([" a ", "b", " c ", " d "],
+ CSV.parse_line(%Q{" a ", b , " c "," d " },
+ strip: true,
+ liberal_parsing: true))
+ end
+
+ def test_string
+ assert_equal(["a", " b"],
+ CSV.parse_line(%Q{ a , " b" },
+ strip: " "))
+ end
+
+ def test_no_quote
+ assert_equal([" a ", " b "],
+ CSV.parse_line(%Q{" a ", b },
+ strip: %Q{"},
+ quote_char: nil))
+ end
+end
diff --git a/test/csv/test_encodings.rb b/test/csv/test_encodings.rb
index 01101f1e099..64ea36a9a4d 100755
--- a/test/csv/test_encodings.rb
+++ b/test/csv/test_encodings.rb
@@ -256,12 +256,13 @@ class TestCSVEncodings < Test::Unit::TestCase
end
def test_invalid_encoding_row_error
- csv = CSV.new("invalid,\xF8\r\nvalid,x\r\n".force_encoding("UTF-8"),
- encoding: "UTF-8")
+ csv = CSV.new("valid,x\rinvalid,\xF8\r".force_encoding("UTF-8"),
+ encoding: "UTF-8", row_sep: "\r")
error = assert_raise(CSV::MalformedCSVError) do
csv.shift
+ csv.shift
end
- assert_equal("Invalid byte sequence in UTF-8 in line 1.",
+ assert_equal("Invalid byte sequence in UTF-8 in line 2.",
error.message)
end
@@ -270,9 +271,9 @@ class TestCSVEncodings < Test::Unit::TestCase
def assert_parses(fields, encoding, options = { })
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
orig_fields = fields
- fields = encode_ary(fields, encoding)
+ fields = encode_ary(fields, encoding)
data = ary_to_data(fields, options)
- parsed = CSV.parse(data, options)
+ parsed = CSV.parse(data, options)
assert_equal(fields, parsed)
parsed.flatten.each_with_index do |field, i|
assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")
diff --git a/test/csv/test_features.rb b/test/csv/test_features.rb
index 0b92776026f..306b880f6fb 100755
--- a/test/csv/test_features.rb
+++ b/test/csv/test_features.rb
@@ -56,7 +56,7 @@ line,4,jkl
error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n")
end
- assert_equal("Unquoted fields do not allow \\r or \\n in line 1.",
+ assert_equal("Unquoted fields do not allow new line <\"\\n\"> in line 1.",
error.message)
assert_equal( ["1", "2", "3\n", "4", "5"],
CSV.parse_line(%Q{1,2,"3\n",4,5\r\n}, row_sep: "\r\n"))
@@ -295,78 +295,6 @@ line,4,jkl
assert_match(/\A\d\.\d\.\d\z/, CSV::VERSION)
end
- def test_accepts_comment_skip_lines_option
- assert_nothing_raised(ArgumentError) do
- CSV.new(@sample_data, :skip_lines => /\A\s*#/)
- end
- end
-
- def test_accepts_comment_defaults_to_nil
- c = CSV.new(@sample_data)
- assert_nil(c.skip_lines)
- end
-
- class RegexStub
- end
-
- def test_requires_skip_lines_to_call_match
- regex_stub = RegexStub.new
- csv = CSV.new(@sample_data, :skip_lines => regex_stub)
- assert_raise_with_message(ArgumentError, /skip_lines/) do
- csv.shift
- end
- end
-
- class Matchable
- def initialize(pattern)
- @pattern = pattern
- end
-
- def match(line)
- @pattern.match(line)
- end
- end
-
- def test_skip_lines_match
- csv = <<-CSV.chomp
-1
-# 2
-3
-# 4
- CSV
- assert_equal([["1"], ["3"]],
- CSV.parse(csv, :skip_lines => Matchable.new(/\A#/)))
- end
-
- def test_comment_rows_are_ignored
- sample_data = "line,1,a\n#not,a,line\nline,2,b\n #also,no,line"
- c = CSV.new sample_data, :skip_lines => /\A\s*#/
- assert_equal [["line", "1", "a"], ["line", "2", "b"]], c.each.to_a
- end
-
- def test_comment_rows_are_ignored_with_heredoc
- sample_data = <<~EOL
- 1,foo
- .2,bar
- 3,baz
- EOL
-
- c = CSV.new(sample_data, skip_lines: ".")
- assert_equal [["1", "foo"], ["3", "baz"]], c.each.to_a
- end
-
- def test_quoted_skip_line_markers_are_ignored
- sample_data = "line,1,a\n\"#not\",a,line\nline,2,b"
- c = CSV.new sample_data, :skip_lines => /\A\s*#/
- assert_equal [["line", "1", "a"], ["#not", "a", "line"], ["line", "2", "b"]], c.each.to_a
- end
-
- def test_string_works_like_a_regexp
- sample_data = "line,1,a\n#(not,a,line\nline,2,b\n also,#no,line"
- c = CSV.new sample_data, :skip_lines => "#"
- assert_equal [["line", "1", "a"], ["line", "2", "b"]], c.each.to_a
- end
-
def test_table_nil_equality
assert_nothing_raised(NoMethodError) { CSV.parse("test", headers: true) == nil }
end
diff --git a/test/csv/test_interface.rb b/test/csv/test_interface.rb
deleted file mode 100755
index 77730fa5dbb..00000000000
--- a/test/csv/test_interface.rb
+++ /dev/null
@@ -1,450 +0,0 @@
-# -*- coding: utf-8 -*-
-# frozen_string_literal: false
-
-require_relative "helper"
-require "tempfile"
-
-class TestCSVInterface < Test::Unit::TestCase
- extend DifferentOFS
-
- def setup
- super
- @tempfile = Tempfile.new(%w"temp .csv")
- @tempfile.close
- @path = @tempfile.path
-
- File.open(@path, "wb") do |file|
- file << "1\t2\t3\r\n"
- file << "4\t5\r\n"
- end
-
- @expected = [%w{1 2 3}, %w{4 5}]
- end
-
- def teardown
- @tempfile.close(true)
- super
- end
-
- ### Test Read Interface ###
-
- def test_foreach
- CSV.foreach(@path, col_sep: "\t", row_sep: "\r\n") do |row|
- assert_equal(@expected.shift, row)
- end
- end
-
- def test_foreach_enum
- CSV.foreach(@path, col_sep: "\t", row_sep: "\r\n").zip(@expected) do |row, exp|
- assert_equal(exp, row)
- end
- end
-
- def test_open_and_close
- csv = CSV.open(@path, "r+", col_sep: "\t", row_sep: "\r\n")
- assert_not_nil(csv)
- assert_instance_of(CSV, csv)
- assert_not_predicate(csv, :closed?)
- csv.close
- assert_predicate(csv, :closed?)
-
- ret = CSV.open(@path) do |new_csv|
- csv = new_csv
- assert_instance_of(CSV, new_csv)
- "Return value."
- end
- assert_predicate(csv, :closed?)
- assert_equal("Return value.", ret)
- end
-
- def test_open_encoding_valid
- # U+1F600 GRINNING FACE
- # U+1F601 GRINNING FACE WITH SMILING EYES
- File.open(@path, "w") do |file|
- file << "\u{1F600},\u{1F601}"
- end
- CSV.open(@path, encoding: "utf-8") do |csv|
- assert_equal([["\u{1F600}", "\u{1F601}"]],
- csv.to_a)
- end
- end
-
- def test_open_encoding_invalid
- # U+1F600 GRINNING FACE
- # U+1F601 GRINNING FACE WITH SMILING EYES
- File.open(@path, "w") do |file|
- file << "\u{1F600},\u{1F601}"
- end
- CSV.open(@path, encoding: "EUC-JP") do |csv|
- error = assert_raise(CSV::MalformedCSVError) do
- csv.shift
- end
- assert_equal("Invalid byte sequence in EUC-JP in line 1.",
- error.message)
- end
- end
-
- def test_open_encoding_nonexistent
- _output, error = capture_io do
- CSV.open(@path, encoding: "nonexistent") do
- end
- end
- assert_equal("path:0: warning: Unsupported encoding nonexistent ignored\n",
- error.gsub(/\A.+:\d+: /, "path:0: "))
- end
-
- def test_open_encoding_utf_8_with_bom
- # U+FEFF ZERO WIDTH NO-BREAK SPACE, BOM
- # U+1F600 GRINNING FACE
- # U+1F601 GRINNING FACE WITH SMILING EYES
- File.open(@path, "w") do |file|
- file << "\u{FEFF}\u{1F600},\u{1F601}"
- end
- CSV.open(@path, encoding: "bom|utf-8") do |csv|
- assert_equal([["\u{1F600}", "\u{1F601}"]],
- csv.to_a)
- end
- end
-
- def test_parse
- data = File.binread(@path)
- assert_equal( @expected,
- CSV.parse(data, col_sep: "\t", row_sep: "\r\n") )
-
- CSV.parse(data, col_sep: "\t", row_sep: "\r\n") do |row|
- assert_equal(@expected.shift, row)
- end
- end
-
- def test_parse_line
- row = CSV.parse_line("1;2;3", col_sep: ";")
- assert_not_nil(row)
- assert_instance_of(Array, row)
- assert_equal(%w{1 2 3}, row)
-
- # shortcut interface
- row = "1;2;3".parse_csv(col_sep: ";")
- assert_not_nil(row)
- assert_instance_of(Array, row)
- assert_equal(%w{1 2 3}, row)
- end
-
- def test_parse_line_with_empty_lines
- assert_equal(nil, CSV.parse_line("")) # to signal eof
- assert_equal(Array.new, CSV.parse_line("\n1,2,3"))
- end
-
- def test_parse_header_only
- table = CSV.parse("a,b,c", headers: true)
- assert_equal([
- ["a", "b", "c"],
- [],
- ],
- [
- table.headers,
- table.each.to_a,
- ])
- end
-
- def test_read_and_readlines
- assert_equal( @expected,
- CSV.read(@path, col_sep: "\t", row_sep: "\r\n") )
- assert_equal( @expected,
- CSV.readlines(@path, col_sep: "\t", row_sep: "\r\n") )
-
-
- data = CSV.open(@path, col_sep: "\t", row_sep: "\r\n") do |csv|
- csv.read
- end
- assert_equal(@expected, data)
- data = CSV.open(@path, col_sep: "\t", row_sep: "\r\n") do |csv|
- csv.readlines
- end
- assert_equal(@expected, data)
- end
-
- def test_table
- table = CSV.table(@path, col_sep: "\t", row_sep: "\r\n")
- assert_instance_of(CSV::Table, table)
- assert_equal([[:"1", :"2", :"3"], [4, 5, nil]], table.to_a)
- end
-
- def test_shift # aliased as gets() and readline()
- CSV.open(@path, "rb+", col_sep: "\t", row_sep: "\r\n") do |csv|
- assert_equal(@expected.shift, csv.shift)
- assert_equal(@expected.shift, csv.shift)
- assert_equal(nil, csv.shift)
- end
- end
-
- def test_enumerators_are_supported
- CSV.open(@path, col_sep: "\t", row_sep: "\r\n") do |csv|
- enum = csv.each
- assert_instance_of(Enumerator, enum)
- assert_equal(@expected.shift, enum.next)
- end
- end
-
- def test_nil_is_not_acceptable
- assert_raise_with_message ArgumentError, "Cannot parse nil as CSV" do
- CSV.new(nil)
- end
- end
-
- def test_open_handles_prematurely_closed_file_descriptor_gracefully
- assert_nothing_raised(Exception) do
- CSV.open(@path) do |csv|
- csv.close
- end
- end
- end
-
- ### Test Write Interface ###
-
- def test_generate
- str = CSV.generate do |csv| # default empty String
- assert_instance_of(CSV, csv)
- assert_equal(csv, csv << [1, 2, 3])
- assert_equal(csv, csv << [4, nil, 5])
- end
- assert_not_nil(str)
- assert_instance_of(String, str)
- assert_equal("1,2,3\n4,,5\n", str)
-
- CSV.generate(str) do |csv| # appending to a String
- assert_equal(csv, csv << ["last", %Q{"row"}])
- end
- assert_equal(%Q{1,2,3\n4,,5\nlast,"""row"""\n}, str)
-
- out = CSV.generate("test") { |csv| csv << ["row"] }
- assert_equal("testrow\n", out)
- end
-
- def test_generate_line
- line = CSV.generate_line(%w{1 2 3}, col_sep: ";")
- assert_not_nil(line)
- assert_instance_of(String, line)
- assert_equal("1;2;3\n", line)
-
- # shortcut interface
- line = %w{1 2 3}.to_csv(col_sep: ";")
- assert_not_nil(line)
- assert_instance_of(String, line)
- assert_equal("1;2;3\n", line)
-
- line = CSV.generate_line(%w"1 2", row_sep: nil)
- assert_equal("1,2", line)
- end
-
- def test_write_header_detection
- File.unlink(@path)
-
- headers = %w{a b c}
- CSV.open(@path, "w", headers: true) do |csv|
- csv << headers
- csv << %w{1 2 3}
- assert_equal(headers, csv.headers)
- end
- end
-
- def test_write_lineno
- File.unlink(@path)
-
- CSV.open(@path, "w") do |csv|
- lines = 20
- lines.times { csv << %w{a b c} }
- assert_equal(lines, csv.lineno)
- end
- end
-
- def test_write_hash
- File.unlink(@path)
-
- lines = [{a: 1, b: 2, c: 3}, {a: 4, b: 5, c: 6}]
- CSV.open( @path, "wb", headers: true,
- header_converters: :symbol ) do |csv|
- csv << lines.first.keys
- lines.each { |line| csv << line }
- end
- CSV.open( @path, "rb", headers: true,
- converters: :all,
- header_converters: :symbol ) do |csv|
- csv.each { |line| assert_equal(lines.shift, line.to_hash) }
- end
- end
-
- def test_write_hash_with_string_keys
- File.unlink(@path)
-
- lines = [{a: 1, b: 2, c: 3}, {a: 4, b: 5, c: 6}]
- CSV.open( @path, "wb", headers: true ) do |csv|
- csv << lines.first.keys
- lines.each { |line| csv << line }
- end
- CSV.open( @path, "rb", headers: true ) do |csv|
- csv.each do |line|
- csv.headers.each_with_index do |header, h|
- keys = line.to_hash.keys
- assert_instance_of(String, keys[h])
- assert_same(header, keys[h])
- end
- end
- end
- end
-
- def test_write_hash_with_headers_array
- File.unlink(@path)
-
- lines = [{a: 1, b: 2, c: 3}, {a: 4, b: 5, c: 6}]
- CSV.open(@path, "wb", headers: [:b, :a, :c]) do |csv|
- lines.each { |line| csv << line }
- end
-
- # test writing fields in the correct order
- File.open(@path, "rb") do |f|
- assert_equal("2,1,3", f.gets.strip)
- assert_equal("5,4,6", f.gets.strip)
- end
-
- # test reading CSV with headers
- CSV.open( @path, "rb", headers: [:b, :a, :c],
- converters: :all ) do |csv|
- csv.each { |line| assert_equal(lines.shift, line.to_hash) }
- end
- end
-
- def test_write_hash_with_headers_string
- File.unlink(@path)
-
- lines = [{"a" => 1, "b" => 2, "c" => 3}, {"a" => 4, "b" => 5, "c" => 6}]
- CSV.open(@path, "wb", headers: "b|a|c", col_sep: "|") do |csv|
- lines.each { |line| csv << line }
- end
-
- # test writing fields in the correct order
- File.open(@path, "rb") do |f|
- assert_equal("2|1|3", f.gets.strip)
- assert_equal("5|4|6", f.gets.strip)
- end
-
- # test reading CSV with headers
- CSV.open( @path, "rb", headers: "b|a|c",
- col_sep: "|",
- converters: :all ) do |csv|
- csv.each { |line| assert_equal(lines.shift, line.to_hash) }
- end
- end
-
- def test_write_headers
- File.unlink(@path)
-
- lines = [{"a" => 1, "b" => 2, "c" => 3}, {"a" => 4, "b" => 5, "c" => 6}]
- CSV.open( @path, "wb", headers: "b|a|c",
- write_headers: true,
- col_sep: "|" ) do |csv|
- lines.each { |line| csv << line }
- end
-
- # test writing fields in the correct order
- File.open(@path, "rb") do |f|
- assert_equal("b|a|c", f.gets.strip)
- assert_equal("2|1|3", f.gets.strip)
- assert_equal("5|4|6", f.gets.strip)
- end
-
- # test reading CSV with headers
- CSV.open( @path, "rb", headers: true,
- col_sep: "|",
- converters: :all ) do |csv|
- csv.each { |line| assert_equal(lines.shift, line.to_hash) }
- end
- end
-
- def test_write_headers_empty
- File.unlink(@path)
-
- CSV.open( @path, "wb", headers: "b|a|c",
- write_headers: true,
- col_sep: "|" ) do |csv|
- end
-
- File.open(@path, "rb") do |f|
- assert_equal("b|a|c", f.gets.strip)
- end
- end
-
- def test_append # aliased add_row() and puts()
- File.unlink(@path)
-
- CSV.open(@path, "wb", col_sep: "\t", row_sep: "\r\n") do |csv|
- @expected.each { |row| csv << row }
- end
-
- test_shift
-
- # same thing using CSV::Row objects
- File.unlink(@path)
-
- CSV.open(@path, "wb", col_sep: "\t", row_sep: "\r\n") do |csv|
- @expected.each { |row| csv << CSV::Row.new(Array.new, row) }
- end
-
- test_shift
- end
-
- ### Test Read and Write Interface ###
-
- def test_filter
- assert_respond_to(CSV, :filter)
-
- expected = [[1, 2, 3], [4, 5]]
- CSV.filter( "1;2;3\n4;5\n", (result = String.new),
- in_col_sep: ";", out_col_sep: ",",
- converters: :all ) do |row|
- assert_equal(row, expected.shift)
- row.map! { |n| n * 2 }
- row << "Added\r"
- end
- assert_equal("2,4,6,\"Added\r\"\n8,10,\"Added\r\"\n", result)
- end
-
- def test_instance
- csv = String.new
-
- first = nil
- assert_nothing_raised(Exception) do
- first = CSV.instance(csv, col_sep: ";")
- first << %w{a b c}
- end
-
- assert_equal("a;b;c\n", csv)
-
- second = nil
- assert_nothing_raised(Exception) do
- second = CSV.instance(csv, col_sep: ";")
- second << [1, 2, 3]
- end
-
- assert_equal(first.object_id, second.object_id)
- assert_equal("a;b;c\n1;2;3\n", csv)
-
- # shortcuts
- assert_equal(STDOUT, CSV.instance.instance_eval { @io })
- assert_equal(STDOUT, CSV { |new_csv| new_csv.instance_eval { @io } })
- end
-
- def test_options_are_not_modified
- opt = {}.freeze
- assert_nothing_raised { CSV.foreach(@path, opt) }
- assert_nothing_raised { CSV.open(@path, opt){} }
- assert_nothing_raised { CSV.parse("", opt) }
- assert_nothing_raised { CSV.parse_line("", opt) }
- assert_nothing_raised { CSV.read(@path, opt) }
- assert_nothing_raised { CSV.readlines(@path, opt) }
- assert_nothing_raised { CSV.table(@path, opt) }
- assert_nothing_raised { CSV.generate(opt){} }
- assert_nothing_raised { CSV.generate_line([], opt) }
- assert_nothing_raised { CSV.filter("", "", opt){} }
- assert_nothing_raised { CSV.instance("", opt) }
- end
-end
diff --git a/test/csv/write/test_converters.rb b/test/csv/write/test_converters.rb
new file mode 100644
index 00000000000..a93b1040ac6
--- /dev/null
+++ b/test/csv/write/test_converters.rb
@@ -0,0 +1,53 @@
+# -*- coding: utf-8 -*-
+# frozen_string_literal: false
+
+require_relative "../helper"
+
+module TestCSVWriteConverters
+ def test_one
+ assert_equal(%Q[=a,=b,=c\n],
+ generate_line(["a", "b", "c"],
+ write_converters: ->(value) {"=" + value}))
+ end
+
+ def test_multiple
+ assert_equal(%Q[=a_,=b_,=c_\n],
+ generate_line(["a", "b", "c"],
+ write_converters: [
+ ->(value) {"=" + value},
+ ->(value) {value + "_"},
+ ]))
+ end
+
+ def test_nil_value
+ assert_equal(%Q[a,NaN,c\n],
+ generate_line(["a", nil, "c"],
+ write_nil_value: "NaN"))
+ end
+
+ def test_empty_value
+ assert_equal(%Q[a,,c\n],
+ generate_line(["a", "", "c"],
+ write_empty_value: nil))
+ end
+end
+
+class TestCSVWriteConvertersGenerateLine < Test::Unit::TestCase
+ include TestCSVWriteConverters
+ extend DifferentOFS
+
+ def generate_line(row, **kwargs)
+ CSV.generate_line(row, **kwargs)
+ end
+end
+
+class TestCSVWriteConvertersGenerate < Test::Unit::TestCase
+ include TestCSVWriteConverters
+ extend DifferentOFS
+
+ def generate_line(row, **kwargs)
+ CSV.generate(**kwargs) do |csv|
+ csv << row
+ end
+ end
+end