summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-14 21:01:51 +0000
committerkou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-14 21:01:51 +0000
commite3b6c7c7ebca1b051dbaa6f33494e92f5638fcc9 (patch)
treef98e60ecbeb320a9e9d8bdacfa3f9231cd55ddda /lib
parentfb96811d15f83c6b692e8e00d458eef32032af6a (diff)
Import CSV 3.0.8
This includes performance improvements and backward incompatibility fixes. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67560 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib')
-rw-r--r--lib/csv.rb129
-rw-r--r--lib/csv/csv.gemspec1
-rw-r--r--lib/csv/delete_suffix.rb18
-rw-r--r--lib/csv/parser.rb502
-rw-r--r--lib/csv/version.rb2
-rw-r--r--lib/csv/writer.rb3
6 files changed, 540 insertions, 115 deletions
diff --git a/lib/csv.rb b/lib/csv.rb
index 1a173c6d68..1239554ad6 100644
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -504,9 +504,9 @@ class CSV
# <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
# but transcode it to UTF-8 before CSV parses it.
#
- def self.foreach(path, **options, &block)
- return to_enum(__method__, path, options) unless block_given?
- open(path, options) do |csv|
+ def self.foreach(path, mode="r", **options, &block)
+ return to_enum(__method__, path, mode, options) unless block_given?
+ open(path, mode, options) do |csv|
csv.each(&block)
end
end
@@ -885,6 +885,10 @@ class CSV
# blank string field is replaced by
# the set object.
# <b><tt>:quote_empty</tt></b>:: TODO
+ # <b><tt>:write_converters</tt></b>:: TODO
+ # <b><tt>:write_nil_value</tt></b>:: TODO
+ # <b><tt>:write_empty_value</tt></b>:: TODO
+ # <b><tt>:strip</tt></b>:: TODO
#
# See CSV::DEFAULT_OPTIONS for the default settings.
#
@@ -911,7 +915,11 @@ class CSV
encoding: nil,
nil_value: nil,
empty_value: "",
- quote_empty: true)
+ quote_empty: true,
+ write_converters: nil,
+ write_nil_value: nil,
+ write_empty_value: "",
+ strip: false)
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
# create the IO object we will read from
@@ -922,8 +930,13 @@ class CSV
nil_value: nil_value,
empty_value: empty_value,
}
+ @write_fields_converter_options = {
+ nil_value: write_nil_value,
+ empty_value: write_empty_value,
+ }
@initial_converters = converters
@initial_header_converters = header_converters
+ @initial_write_converters = write_converters
@parser_options = {
column_separator: col_sep,
@@ -939,6 +952,7 @@ class CSV
encoding: @encoding,
nil_value: nil_value,
empty_value: empty_value,
+ strip: strip,
}
@parser = nil
@@ -998,7 +1012,7 @@ class CSV
# as is.
#
def converters
- fields_converter.map do |converter|
+ parser_fields_converter.map do |converter|
name = Converters.rassoc(converter)
name ? name.first : converter
end
@@ -1098,12 +1112,58 @@ class CSV
### IO and StringIO Delegation ###
extend Forwardable
- def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
- :closed?, :eof, :eof?, :external_encoding, :fcntl,
- :fileno, :flock, :flush, :fsync, :internal_encoding,
- :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
- :seek, :stat, :string, :sync, :sync=, :tell, :to_i,
- :to_io, :truncate, :tty?
+ def_delegators :@io, :binmode, :close, :close_read, :close_write,
+ :closed?, :external_encoding, :fcntl,
+ :fileno, :flush, :fsync, :internal_encoding,
+ :isatty, :pid, :pos, :pos=, :reopen,
+ :seek, :string, :sync, :sync=, :tell,
+ :truncate, :tty?
+
+ def binmode?
+ if @io.respond_to?(:binmode?)
+ @io.binmode?
+ else
+ false
+ end
+ end
+
+ def flock(*args)
+ raise NotImplementedError unless @io.respond_to?(:flock)
+ @io.flock(*args)
+ end
+
+ def ioctl(*args)
+ raise NotImplementedError unless @io.respond_to?(:ioctl)
+ @io.ioctl(*args)
+ end
+
+ def path
+ @io.path if @io.respond_to?(:path)
+ end
+
+ def stat(*args)
+ raise NotImplementedError unless @io.respond_to?(:stat)
+ @io.stat(*args)
+ end
+
+ def to_i
+ raise NotImplementedError unless @io.respond_to?(:to_i)
+ @io.to_i
+ end
+
+ def to_io
+ @io.respond_to?(:to_io) ? @io.to_io : @io
+ end
+
+ def eof?
+ begin
+ parser_enumerator.peek
+ false
+ rescue StopIteration
+ true
+ end
+ end
+ alias_method :eof, :eof?
# Rewinds the underlying IO object and resets CSV's lineno() counter.
def rewind
@@ -1145,7 +1205,7 @@ class CSV
# converted field or the field itself.
#
def convert(name = nil, &converter)
- fields_converter.add_converter(name, &converter)
+ parser_fields_converter.add_converter(name, &converter)
end
#
@@ -1173,7 +1233,7 @@ class CSV
# The data source must be open for reading.
#
def each(&block)
- parser.parse(&block)
+ parser_enumerator.each(&block)
end
#
@@ -1204,9 +1264,8 @@ class CSV
# The data source must be open for reading.
#
def shift
- @parser_enumerator ||= parser.parse
begin
- @parser_enumerator.next
+ parser_enumerator.next
rescue StopIteration
nil
end
@@ -1299,7 +1358,7 @@ class CSV
if headers
header_fields_converter.convert(fields, nil, 0)
else
- fields_converter.convert(fields, @headers, lineno)
+ parser_fields_converter.convert(fields, @headers, lineno)
end
end
@@ -1316,20 +1375,16 @@ class CSV
end
end
- def fields_converter
- @fields_converter ||= build_fields_converter
+ def parser_fields_converter
+ @parser_fields_converter ||= build_parser_fields_converter
end
- def build_fields_converter
+ def build_parser_fields_converter
specific_options = {
builtin_converters: Converters,
}
options = @base_fields_converter_options.merge(specific_options)
- fields_converter = FieldsConverter.new(options)
- normalize_converters(@initial_converters).each do |name, converter|
- fields_converter.add_converter(name, &converter)
- end
- fields_converter
+ build_fields_converter(@initial_converters, options)
end
def header_fields_converter
@@ -1342,8 +1397,21 @@ class CSV
accept_nil: true,
}
options = @base_fields_converter_options.merge(specific_options)
+ build_fields_converter(@initial_header_converters, options)
+ end
+
+ def writer_fields_converter
+ @writer_fields_converter ||= build_writer_fields_converter
+ end
+
+ def build_writer_fields_converter
+ build_fields_converter(@initial_write_converters,
+ @write_fields_converter_options)
+ end
+
+ def build_fields_converter(initial_converters, options)
fields_converter = FieldsConverter.new(options)
- normalize_converters(@initial_header_converters).each do |name, converter|
+ normalize_converters(initial_converters).each do |name, converter|
fields_converter.add_converter(name, &converter)
end
fields_converter
@@ -1354,8 +1422,12 @@ class CSV
end
def parser_options
- @parser_options.merge(fields_converter: fields_converter,
- header_fields_converter: header_fields_converter)
+ @parser_options.merge(header_fields_converter: header_fields_converter,
+ fields_converter: parser_fields_converter)
+ end
+
+ def parser_enumerator
+ @parser_enumerator ||= parser.parse
end
def writer
@@ -1363,7 +1435,8 @@ class CSV
end
def writer_options
- @writer_options.merge(header_fields_converter: header_fields_converter)
+ @writer_options.merge(header_fields_converter: header_fields_converter,
+ fields_converter: writer_fields_converter)
end
end
diff --git a/lib/csv/csv.gemspec b/lib/csv/csv.gemspec
index f57d9efb7d..98110bc13c 100644
--- a/lib/csv/csv.gemspec
+++ b/lib/csv/csv.gemspec
@@ -25,6 +25,7 @@ Gem::Specification.new do |spec|
"lib/csv.rb",
"lib/csv/core_ext/array.rb",
"lib/csv/core_ext/string.rb",
+ "lib/csv/delete_suffix.rb",
"lib/csv/fields_converter.rb",
"lib/csv/match_p.rb",
"lib/csv/parser.rb",
diff --git a/lib/csv/delete_suffix.rb b/lib/csv/delete_suffix.rb
new file mode 100644
index 0000000000..e0b40c7aab
--- /dev/null
+++ b/lib/csv/delete_suffix.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+# This provides String#delete_suffix? for Ruby 2.4.
+unless String.method_defined?(:delete_suffix)
+ class CSV
+ module DeleteSuffix
+ refine String do
+ def delete_suffix(suffix)
+ if end_with?(suffix)
+ self[0..(-(suffix.size + 1))]
+ else
+ self
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index e6cbc07461..85252203e4 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -2,10 +2,12 @@
require "strscan"
+require_relative "delete_suffix"
require_relative "match_p"
require_relative "row"
require_relative "table"
+using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV
@@ -21,6 +23,15 @@ class CSV
@keeps = []
end
+ def each_line(row_separator)
+ position = pos
+ rest.each_line(row_separator) do |line|
+ position += line.bytesize
+ self.pos = position
+ yield(line)
+ end
+ end
+
def keep_start
@keeps.push(pos)
end
@@ -49,6 +60,50 @@ class CSV
read_chunk
end
+ def each_line(row_separator)
+ buffer = nil
+ input = @scanner.rest
+ position = @scanner.pos
+ offset = 0
+ n_row_separator_chars = row_separator.size
+ while true
+ input.each_line(row_separator) do |line|
+ @scanner.pos += line.bytesize
+ if buffer
+ if n_row_separator_chars == 2 and
+ buffer.end_with?(row_separator[0]) and
+ line.start_with?(row_separator[1])
+ buffer << line[0]
+ line = line[1..-1]
+ position += buffer.bytesize + offset
+ @scanner.pos = position
+ offset = 0
+ yield(buffer)
+ buffer = nil
+ next if line.empty?
+ else
+ buffer << line
+ line = buffer
+ buffer = nil
+ end
+ end
+ if line.end_with?(row_separator)
+ position += line.bytesize + offset
+ @scanner.pos = position
+ offset = 0
+ yield(line)
+ else
+ buffer = line
+ end
+ end
+ break unless read_chunk
+ input = @scanner.rest
+ position = @scanner.pos
+ offset = -buffer.bytesize if buffer
+ end
+ yield(buffer) if buffer
+ end
+
def scan(pattern)
value = @scanner.scan(pattern)
return value if @last_scanner
@@ -94,7 +149,7 @@ class CSV
start, buffer = @keeps.pop
if buffer
string = @scanner.string
- keep = string[start, string.size - start]
+ keep = string.byteslice(start, string.bytesize - start)
if keep and not keep.empty?
@inputs.unshift(StringIO.new(keep))
@last_scanner = false
@@ -103,6 +158,7 @@ class CSV
else
@scanner.pos = start
end
+ read_chunk if @scanner.eos?
end
def keep_drop
@@ -121,7 +177,7 @@ class CSV
keep = @keeps.last
keep_start = keep[0]
string = @scanner.string
- keep_data = string[keep_start, @scanner.pos - keep_start]
+ keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
if keep_data
keep_buffer = keep[1]
if keep_buffer
@@ -170,7 +226,6 @@ class CSV
@input = input
@options = options
@samples = []
- @parsed = false
prepare
end
@@ -230,9 +285,7 @@ class CSV
def parse(&block)
return to_enum(__method__) unless block_given?
- return if @parsed
-
- if @return_headers and @headers
+ if @return_headers and @headers and @raw_headers
headers = Row.new(@headers, @raw_headers, true)
if @unconverted_fields
headers = add_unconverted_fields(headers, [])
@@ -240,58 +293,25 @@ class CSV
yield headers
end
- row = []
begin
- @scanner = build_scanner
- skip_needless_lines
- start_row
- while true
- @quoted_column_value = false
- @unquoted_column_value = false
- value = parse_column_value
- if value and @field_size_limit and value.size >= @field_size_limit
- raise MalformedCSVError.new("Field size exceeded", @lineno + 1)
- end
- if parse_column_end
- row << value
- elsif parse_row_end
- if row.empty? and value.nil?
- emit_row([], &block) unless @skip_blanks
- else
- row << value
- emit_row(row, &block)
- row = []
- end
- skip_needless_lines
- start_row
- elsif @scanner.eos?
- break if row.empty? and value.nil?
- row << value
- emit_row(row, &block)
- break
- else
- if @quoted_column_value
- message = "Do not allow except col_sep_split_separator " +
- "after quoted fields"
- raise MalformedCSVError.new(message, @lineno + 1)
- elsif @unquoted_column_value and @scanner.scan(@cr_or_lf)
- message = "Unquoted fields do not allow \\r or \\n"
- raise MalformedCSVError.new(message, @lineno + 1)
- elsif @scanner.rest.start_with?(@quote_character)
- message = "Illegal quoting"
- raise MalformedCSVError.new(message, @lineno + 1)
- else
- raise MalformedCSVError.new("TODO: Meaningful message",
- @lineno + 1)
- end
- end
+ @scanner ||= build_scanner
+ if quote_character.nil?
+ parse_no_quote(&block)
+ elsif @need_robust_parsing
+ parse_quotable_robust(&block)
+ else
+ parse_quotable_loose(&block)
end
rescue InvalidEncoding
+ if @scanner
+ ignore_broken_line
+ lineno = @lineno
+ else
+ lineno = @lineno + 1
+ end
message = "Invalid byte sequence in #{@encoding}"
- raise MalformedCSVError.new(message, @lineno + 1)
+ raise MalformedCSVError.new(message, lineno)
end
-
- @parsed = true
end
def use_headers?
@@ -301,13 +321,20 @@ class CSV
private
def prepare
prepare_variable
- prepare_regexp
+ prepare_quote_character
+ prepare_backslash
+ prepare_skip_lines
+ prepare_strip
+ prepare_separators
+ prepare_quoted
+ prepare_unquoted
prepare_line
prepare_header
prepare_parser
end
def prepare_variable
+ @need_robust_parsing = false
@encoding = @options[:encoding]
liberal_parsing = @options[:liberal_parsing]
if liberal_parsing
@@ -315,11 +342,15 @@ class CSV
if liberal_parsing.is_a?(Hash)
@double_quote_outside_quote =
liberal_parsing[:double_quote_outside_quote]
+ @backslash_quote = liberal_parsing[:backslash_quote]
else
@double_quote_outside_quote = false
+ @backslash_quote = false
end
+ @need_robust_parsing = true
else
@liberal_parsing = false
+ @backslash_quote = false
end
@unconverted_fields = @options[:unconverted_fields]
@field_size_limit = @options[:field_size_limit]
@@ -328,20 +359,39 @@ class CSV
@header_fields_converter = @options[:header_fields_converter]
end
- def prepare_regexp
- @column_separator = @options[:column_separator].to_s.encode(@encoding)
- @row_separator =
- resolve_row_separator(@options[:row_separator]).encode(@encoding)
- @quote_character = @options[:quote_character].to_s.encode(@encoding)
- if @quote_character.length != 1
- raise ArgumentError, ":quote_char has to be a single character String"
+ def prepare_quote_character
+ @quote_character = @options[:quote_character]
+ if @quote_character.nil?
+ @escaped_quote_character = nil
+ @escaped_quote = nil
+ else
+ @quote_character = @quote_character.to_s.encode(@encoding)
+ if @quote_character.length != 1
+ message = ":quote_char has to be nil or a single character String"
+ raise ArgumentError, message
+ end
+ @double_quote_character = @quote_character * 2
+ @escaped_quote_character = Regexp.escape(@quote_character)
+ @escaped_quote = Regexp.new(@escaped_quote_character)
end
+ end
- escaped_column_separator = Regexp.escape(@column_separator)
- escaped_first_column_separator = Regexp.escape(@column_separator[0])
- escaped_row_separator = Regexp.escape(@row_separator)
- escaped_quote_character = Regexp.escape(@quote_character)
+ def prepare_backslash
+ return unless @backslash_quote
+ @backslash_character = "\\".encode(@encoding)
+
+ @escaped_backslash_character = Regexp.escape(@backslash_character)
+ @escaped_backslash = Regexp.new(@escaped_backslash_character)
+ if @quote_character.nil?
+ @backslash_quote_character = nil
+ else
+ @backslash_quote_character =
+ @backslash_character + @escaped_quote_character
+ end
+ end
+
+ def prepare_skip_lines
skip_lines = @options[:skip_lines]
case skip_lines
when String
@@ -356,18 +406,71 @@ class CSV
end
@skip_lines = skip_lines
end
+ end
+
+ def prepare_strip
+ @strip = @options[:strip]
+ @escaped_strip = nil
+ @strip_value = nil
+ if @strip.is_a?(String)
+ case @strip.length
+ when 0
+ raise ArgumentError, ":strip must not be an empty String"
+ when 1
+ # ok
+ else
+ raise ArgumentError, ":strip doesn't support 2 or more characters yet"
+ end
+ @strip = @strip.encode(@encoding)
+ @escaped_strip = Regexp.escape(@strip)
+ if @quote_character
+ @strip_value = Regexp.new(@escaped_strip +
+ "+".encode(@encoding))
+ end
+ @need_robust_parsing = true
+ elsif @strip
+ strip_values = " \t\r\n\f\v"
+ @escaped_strip = strip_values.encode(@encoding)
+ if @quote_character
+ @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
+ end
+ @need_robust_parsing = true
+ end
+ end
- @column_end = Regexp.new(escaped_column_separator)
+ begin
+ StringScanner.new("x").scan("x")
+ rescue TypeError
+ @@string_scanner_scan_accept_string = false
+ else
+ @@string_scanner_scan_accept_string = true
+ end
+
+ def prepare_separators
+ @column_separator = @options[:column_separator].to_s.encode(@encoding)
+ @row_separator =
+ resolve_row_separator(@options[:row_separator]).encode(@encoding)
+
+ @escaped_column_separator = Regexp.escape(@column_separator)
+ @escaped_first_column_separator = Regexp.escape(@column_separator[0])
if @column_separator.size > 1
+ @column_end = Regexp.new(@escaped_column_separator)
@column_ends = @column_separator.each_char.collect do |char|
Regexp.new(Regexp.escape(char))
end
- @first_column_separators = Regexp.new(escaped_first_column_separator +
+ @first_column_separators = Regexp.new(@escaped_first_column_separator +
"+".encode(@encoding))
else
+ if @@string_scanner_scan_accept_string
+ @column_end = @column_separator
+ else
+ @column_end = Regexp.new(@escaped_column_separator)
+ end
@column_ends = nil
@first_column_separators = nil
end
+
+ escaped_row_separator = Regexp.escape(@row_separator)
@row_end = Regexp.new(escaped_row_separator)
if @row_separator.size > 1
@row_ends = @row_separator.each_char.collect do |char|
@@ -376,25 +479,56 @@ class CSV
else
@row_ends = nil
end
- @quotes = Regexp.new(escaped_quote_character +
- "+".encode(@encoding))
- @quoted_value = Regexp.new("[^".encode(@encoding) +
- escaped_quote_character +
- "]+".encode(@encoding))
- if @liberal_parsing
- @unquoted_value = Regexp.new("[^".encode(@encoding) +
- escaped_first_column_separator +
- "\r\n]+".encode(@encoding))
- else
- @unquoted_value = Regexp.new("[^".encode(@encoding) +
- escaped_quote_character +
- escaped_first_column_separator +
- "\r\n]+".encode(@encoding))
- end
+
+ @cr = "\r".encode(@encoding)
+ @lf = "\n".encode(@encoding)
@cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
@not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
end
+ def prepare_quoted
+ if @quote_character
+ @quotes = Regexp.new(@escaped_quote_character +
+ "+".encode(@encoding))
+ no_quoted_values = @escaped_quote_character.dup
+ if @backslash_quote
+ no_quoted_values << @escaped_backslash_character
+ end
+ @quoted_value = Regexp.new("[^".encode(@encoding) +
+ no_quoted_values +
+ "]+".encode(@encoding))
+ end
+ if @escaped_strip
+ @split_column_separator = Regexp.new(@escaped_strip +
+ "*".encode(@encoding) +
+ @escaped_column_separator +
+ @escaped_strip +
+ "*".encode(@encoding))
+ else
+ if @column_separator == " ".encode(@encoding)
+ @split_column_separator = Regexp.new(@escaped_column_separator)
+ else
+ @split_column_separator = @column_separator
+ end
+ end
+ end
+
+ def prepare_unquoted
+ return if @quote_character.nil?
+
+ no_unquoted_values = "\r\n".encode(@encoding)
+ no_unquoted_values << @escaped_first_column_separator
+ unless @liberal_parsing
+ no_unquoted_values << @escaped_quote_character
+ end
+ if @escaped_strip
+ no_unquoted_values << @escaped_strip
+ end
+ @unquoted_value = Regexp.new("[^".encode(@encoding) +
+ no_unquoted_values +
+ "]+".encode(@encoding))
+ end
+
def resolve_row_separator(separator)
if separator == :auto
cr = "\r".encode(@encoding)
@@ -514,6 +648,8 @@ class CSV
end
def may_quoted?
+ return false if @quote_character.nil?
+
if @input.is_a?(StringIO)
sample = @input.string
else
@@ -534,6 +670,10 @@ class CSV
@io.gets(*args)
end
+ def each_line(*args, &block)
+ @io.each_line(*args, &block)
+ end
+
def eof?
@io.eof?
end
@@ -548,7 +688,10 @@ class CSV
else
inputs << @input
end
- InputsScanner.new(inputs, @encoding, chunk_size: 1)
+ chunk_size = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"
+ InputsScanner.new(inputs,
+ @encoding,
+ chunk_size: Integer(chunk_size, 10))
end
else
def build_scanner
@@ -560,8 +703,13 @@ class CSV
end
if string
unless string.valid_encoding?
- message = "Invalid byte sequence in #{@encoding}"
- raise MalformedCSVError.new(message, @lineno + 1)
+ index = string.lines(@row_separator).index do |line|
+ !line.valid_encoding?
+ end
+ if index
+ message = "Invalid byte sequence in #{@encoding}"
+ raise MalformedCSVError.new(message, @lineno + index + 1)
+ end
end
Scanner.new(string)
else
@@ -582,6 +730,7 @@ class CSV
line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
line << @row_separator if parse_row_end
if skip_line?(line)
+ @lineno += 1
@scanner.keep_drop
else
@scanner.keep_back
@@ -601,6 +750,147 @@ class CSV
end
end
+ def parse_no_quote(&block)
+ @scanner.each_line(@row_separator) do |line|
+ next if @skip_lines and skip_line?(line)
+ original_line = line
+ line = line.delete_suffix(@row_separator)
+
+ if line.empty?
+ next if @skip_blanks
+ row = []
+ else
+ line = strip_value(line)
+ row = line.split(@split_column_separator, -1)
+ n_columns = row.size
+ i = 0
+ while i < n_columns
+ row[i] = nil if row[i].empty?
+ i += 1
+ end
+ end
+ @last_line = original_line
+ emit_row(row, &block)
+ end
+ end
+
+ def parse_quotable_loose(&block)
+ @scanner.keep_start
+ @scanner.each_line(@row_separator) do |line|
+ if @skip_lines and skip_line?(line)
+ @scanner.keep_drop
+ @scanner.keep_start
+ next
+ end
+ original_line = line
+ line = line.delete_suffix(@row_separator)
+
+ if line.empty?
+ if @skip_blanks
+ @scanner.keep_drop
+ @scanner.keep_start
+ next
+ end
+ row = []
+ elsif line.include?(@cr) or line.include?(@lf)
+ @scanner.keep_back
+ @need_robust_parsing = true
+ return parse_quotable_robust(&block)
+ else
+ row = line.split(@split_column_separator, -1)
+ n_columns = row.size
+ i = 0
+ while i < n_columns
+ column = row[i]
+ if column.empty?
+ row[i] = nil
+ else
+ n_quotes = column.count(@quote_character)
+ if n_quotes.zero?
+ # no quote
+ elsif n_quotes == 2 and
+ column.start_with?(@quote_character) and
+ column.end_with?(@quote_character)
+ row[i] = column[1..-2]
+ else
+ @scanner.keep_back
+ @need_robust_parsing = true
+ return parse_quotable_robust(&block)
+ end
+ end
+ i += 1
+ end
+ end
+ @scanner.keep_drop
+ @scanner.keep_start
+ @last_line = original_line
+ emit_row(row, &block)
+ end
+ @scanner.keep_drop
+ end
+
+ def parse_quotable_robust(&block)
+ row = []
+ skip_needless_lines
+ start_row
+ while true
+ @quoted_column_value = false
+ @unquoted_column_value = false
+ @scanner.scan_all(@strip_value) if @strip_value
+ value = parse_column_value
+ if value
+ @scanner.scan_all(@strip_value) if @strip_value
+ if @field_size_limit and value.size >= @field_size_limit
+ ignore_broken_line
+ raise MalformedCSVError.new("Field size exceeded", @lineno)
+ end
+ end
+ if parse_column_end
+ row << value
+ elsif parse_row_end
+ if row.empty? and value.nil?
+ emit_row([], &block) unless @skip_blanks
+ else
+ row << value
+ emit_row(row, &block)
+ row = []
+ end
+ skip_needless_lines
+ start_row
+ elsif @scanner.eos?
+ break if row.empty? and value.nil?
+ row << value
+ emit_row(row, &block)
+ break
+ else
+ if @quoted_column_value
+ ignore_broken_line
+ message = "Any value after quoted field isn't allowed"
+ raise MalformedCSVError.new(message, @lineno)
+ elsif @unquoted_column_value and
+ (new_line = @scanner.scan(@cr_or_lf))
+ ignore_broken_line
+ message = "Unquoted fields do not allow new line " +
+ "<#{new_line.inspect}>"
+ raise MalformedCSVError.new(message, @lineno)
+ elsif @scanner.rest.start_with?(@quote_character)
+ ignore_broken_line
+ message = "Illegal quoting"
+ raise MalformedCSVError.new(message, @lineno)
+ elsif (new_line = @scanner.scan(@cr_or_lf))
+ ignore_broken_line
+ message = "New line must be <#{@row_separator.inspect}> " +
+ "not <#{new_line.inspect}>"
+ raise MalformedCSVError.new(message, @lineno)
+ else
+ ignore_broken_line
+ raise MalformedCSVError.new("TODO: Meaningful message",
+ @lineno)
+ end
+ end
+ end
+ end
+
def parse_column_value
if @liberal_parsing
quoted_value = parse_quoted_column_value
@@ -651,6 +941,7 @@ class CSV
value << sub_value
end
end
+ value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
value
end
@@ -667,10 +958,22 @@ class CSV
while true
quoted_value = @scanner.scan_all(@quoted_value)
value << quoted_value if quoted_value
+ if @backslash_quote
+ if @scanner.scan(@escaped_backslash)
+ if @scanner.scan(@escaped_quote)
+ value << @quote_character
+ else
+ value << @backslash_character
+ end
+ next
+ end
+ end
+
quotes = @scanner.scan_all(@quotes)
unless quotes
+ ignore_broken_line
message = "Unclosed quoted field"
- raise MalformedCSVError.new(message, @lineno + 1)
+ raise MalformedCSVError.new(message, @lineno)
end
n_quotes = quotes.size
if n_quotes == 1
@@ -713,6 +1016,33 @@ class CSV
end
end
+ def strip_value(value)
+ return value unless @strip
+ return nil if value.nil?
+
+ case @strip
+ when String
+ size = value.size
+ while value.start_with?(@strip)
+ size -= 1
+ value = value[1, size]
+ end
+ while value.end_with?(@strip)
+ size -= 1
+ value = value[0, size]
+ end
+ else
+ value.strip!
+ end
+ value
+ end
+
+ def ignore_broken_line
+ @scanner.scan_all(@not_line_end)
+ @scanner.scan_all(@cr_or_lf)
+ @lineno += 1
+ end
+
def start_row
if @last_line
@last_line = nil
diff --git a/lib/csv/version.rb b/lib/csv/version.rb
index 0b4b7d1966..b2b0ad743a 100644
--- a/lib/csv/version.rb
+++ b/lib/csv/version.rb
@@ -2,5 +2,5 @@
class CSV
# The version of the installed library.
- VERSION = "3.0.4"
+ VERSION = "3.0.9"
end
diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb
index 36db9d4014..8e0aab32ff 100644
--- a/lib/csv/writer.rb
+++ b/lib/csv/writer.rb
@@ -18,6 +18,7 @@ class CSV
if @options[:write_headers] and @headers
self << @headers
end
+ @fields_converter = @options[:fields_converter]
end
def <<(row)
@@ -31,6 +32,8 @@ class CSV
@headers ||= row if @use_headers
@lineno += 1
+ row = @fields_converter.convert(row, nil, lineno) if @fields_converter
+
converted_row = row.collect do |field|
quote(field)
end