summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-01-26 08:02:47 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-01-26 08:02:47 +0000
commitdf7ac8afa196bb9d1836fc6d188c831b27b8e507 (patch)
tree18ffe400c7cf54d0849ab65109bcaafa66ed1137 /lib
parenteb1d5ab5a0309e117ea2cb05915131393adc893d (diff)
merge revision(s) 66922: [Backport #15521]
Upgrade CSV to 3.0.4 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@66926 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib')
-rw-r--r--lib/csv.rb21
-rw-r--r--lib/csv/csv.gemspec2
-rw-r--r--lib/csv/parser.rb61
-rw-r--r--lib/csv/row.rb7
-rw-r--r--lib/csv/table.rb11
-rw-r--r--lib/csv/version.rb2
-rw-r--r--lib/csv/writer.rb65
7 files changed, 115 insertions, 54 deletions
diff --git a/lib/csv.rb b/lib/csv.rb
index ebdc6e5c6d..1a173c6d68 100644
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -397,6 +397,7 @@ class CSV
# <b><tt>:force_quotes</tt></b>:: +false+
# <b><tt>:skip_lines</tt></b>:: +nil+
# <b><tt>:liberal_parsing</tt></b>:: +false+
+ # <b><tt>:quote_empty</tt></b>:: +true+
#
DEFAULT_OPTIONS = {
col_sep: ",",
@@ -412,6 +413,7 @@ class CSV
force_quotes: false,
skip_lines: nil,
liberal_parsing: false,
+ quote_empty: true,
}.freeze
#
@@ -534,7 +536,7 @@ class CSV
str.seek(0, IO::SEEK_END)
else
encoding = options[:encoding]
- str = String.new
+ str = +""
str.force_encoding(encoding) if encoding
end
csv = new(str, options) # wrap
@@ -557,11 +559,11 @@ class CSV
#
def self.generate_line(row, **options)
options = {row_sep: $INPUT_RECORD_SEPARATOR}.merge(options)
- str = String.new
+ str = +""
if options[:encoding]
str.force_encoding(options[:encoding])
- elsif field = row.find { |f| not f.nil? }
- str.force_encoding(String(field).encoding)
+ elsif field = row.find {|f| f.is_a?(String)}
+ str.force_encoding(field.encoding)
end
(new(str, options) << row).string
end
@@ -882,6 +884,7 @@ class CSV
# <b><tt>:empty_value</tt></b>:: When set an object, any values of a
# blank string field is replaced by
# the set object.
+ # <b><tt>:quote_empty</tt></b>:: TODO
#
# See CSV::DEFAULT_OPTIONS for the default settings.
#
@@ -907,7 +910,8 @@ class CSV
external_encoding: nil,
encoding: nil,
nil_value: nil,
- empty_value: "")
+ empty_value: "",
+ quote_empty: true)
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
# create the IO object we will read from
@@ -947,6 +951,7 @@ class CSV
column_separator: col_sep,
row_separator: row_sep,
quote_character: quote_char,
+ quote_empty: quote_empty,
}
@writer = nil
@@ -1178,9 +1183,8 @@ class CSV
#
def read
rows = to_a
- headers = parser.headers
- if headers
- Table.new(rows, headers: headers)
+ if parser.use_headers?
+ Table.new(rows, headers: parser.headers)
else
rows
end
@@ -1240,7 +1244,6 @@ class CSV
end
end
_headers = headers
- _headers = headers
str << " headers:" << _headers.inspect if _headers
str << ">"
begin
diff --git a/lib/csv/csv.gemspec b/lib/csv/csv.gemspec
index 0c9d265584..f57d9efb7d 100644
--- a/lib/csv/csv.gemspec
+++ b/lib/csv/csv.gemspec
@@ -38,6 +38,6 @@ Gem::Specification.new do |spec|
spec.add_development_dependency "bundler"
spec.add_development_dependency "rake"
- spec.add_development_dependency "benchmark-ips"
+ spec.add_development_dependency "benchmark_driver"
spec.add_development_dependency "simplecov"
end
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index 2682c27ea3..e6cbc07461 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -170,6 +170,7 @@ class CSV
@input = input
@options = options
@samples = []
+ @parsed = false
prepare
end
@@ -229,6 +230,8 @@ class CSV
def parse(&block)
return to_enum(__method__) unless block_given?
+ return if @parsed
+
if @return_headers and @headers
headers = Row.new(@headers, @raw_headers, true)
if @unconverted_fields
@@ -262,10 +265,10 @@ class CSV
skip_needless_lines
start_row
elsif @scanner.eos?
- return if row.empty? and value.nil?
+ break if row.empty? and value.nil?
row << value
emit_row(row, &block)
- return
+ break
else
if @quoted_column_value
message = "Do not allow except col_sep_split_separator " +
@@ -287,6 +290,12 @@ class CSV
message = "Invalid byte sequence in #{@encoding}"
raise MalformedCSVError.new(message, @lineno + 1)
end
+
+ @parsed = true
+ end
+
+ def use_headers?
+ @use_headers
end
private
@@ -300,7 +309,18 @@ class CSV
def prepare_variable
@encoding = @options[:encoding]
- @liberal_parsing = @options[:liberal_parsing]
+ liberal_parsing = @options[:liberal_parsing]
+ if liberal_parsing
+ @liberal_parsing = true
+ if liberal_parsing.is_a?(Hash)
+ @double_quote_outside_quote =
+ liberal_parsing[:double_quote_outside_quote]
+ else
+ @double_quote_outside_quote = false
+ end
+ else
+ @liberal_parsing = false
+ end
@unconverted_fields = @options[:unconverted_fields]
@field_size_limit = @options[:field_size_limit]
@skip_blanks = @options[:skip_blanks]
@@ -318,6 +338,7 @@ class CSV
end
escaped_column_separator = Regexp.escape(@column_separator)
+ escaped_first_column_separator = Regexp.escape(@column_separator[0])
escaped_row_separator = Regexp.escape(@row_separator)
escaped_quote_character = Regexp.escape(@quote_character)
@@ -341,8 +362,11 @@ class CSV
@column_ends = @column_separator.each_char.collect do |char|
Regexp.new(Regexp.escape(char))
end
+ @first_column_separators = Regexp.new(escaped_first_column_separator +
+ "+".encode(@encoding))
else
@column_ends = nil
+ @first_column_separators = nil
end
@row_end = Regexp.new(escaped_row_separator)
if @row_separator.size > 1
@@ -359,12 +383,12 @@ class CSV
"]+".encode(@encoding))
if @liberal_parsing
@unquoted_value = Regexp.new("[^".encode(@encoding) +
- escaped_column_separator +
+ escaped_first_column_separator +
"\r\n]+".encode(@encoding))
else
@unquoted_value = Regexp.new("[^".encode(@encoding) +
escaped_quote_character +
- escaped_column_separator +
+ escaped_first_column_separator +
"\r\n]+".encode(@encoding))
end
@cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
@@ -583,6 +607,13 @@ class CSV
if quoted_value
unquoted_value = parse_unquoted_column_value
if unquoted_value
+ if @double_quote_outside_quote
+ unquoted_value = unquoted_value.gsub(@quote_character * 2,
+ @quote_character)
+ if quoted_value.empty? # %Q{""...} case
+ return @quote_character + unquoted_value
+ end
+ end
@quote_character + quoted_value + @quote_character + unquoted_value
else
quoted_value
@@ -601,7 +632,25 @@ class CSV
def parse_unquoted_column_value
value = @scanner.scan_all(@unquoted_value)
- @unquoted_column_value = true if value
+ return nil unless value
+
+ @unquoted_column_value = true
+ if @first_column_separators
+ while true
+ @scanner.keep_start
+ is_column_end = @column_ends.all? do |column_end|
+ @scanner.scan(column_end)
+ end
+ @scanner.keep_back
+ break if is_column_end
+ sub_separator = @scanner.scan_all(@first_column_separators)
+ break if sub_separator.nil?
+ value << sub_separator
+ sub_value = @scanner.scan_all(@unquoted_value)
+ break if sub_value.nil?
+ value << sub_value
+ end
+ end
value
end
diff --git a/lib/csv/row.rb b/lib/csv/row.rb
index 31eab2d0a4..c79d75cd8a 100644
--- a/lib/csv/row.rb
+++ b/lib/csv/row.rb
@@ -130,6 +130,7 @@ class CSV
alias_method :include?, :has_key?
alias_method :key?, :has_key?
alias_method :member?, :has_key?
+ alias_method :header?, :has_key?
#
# :call-seq:
@@ -286,12 +287,6 @@ class CSV
index.nil? ? nil : index + minimum_index
end
- # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
- def header?(name)
- headers.include? name
- end
- alias_method :include?, :header?
-
#
# Returns +true+ if +data+ matches a field in this row, and +false+
# otherwise.
diff --git a/lib/csv/table.rb b/lib/csv/table.rb
index b13d1ada10..71eb885de5 100644
--- a/lib/csv/table.rb
+++ b/lib/csv/table.rb
@@ -19,7 +19,7 @@ class CSV
# The optional +headers+ parameter can be set to Array of headers.
# If headers aren't set, headers are fetched from CSV::Row objects.
# Otherwise, headers() method will return headers being set in
- # headers arugument.
+ # headers argument.
#
# A CSV::Table object supports the following Array methods through
# delegation:
@@ -133,10 +133,15 @@ class CSV
#
# Returns the headers for the first row of this table (assumed to match all
- # other rows). An empty Array is returned for empty tables.
+ # other rows). The headers Array passed to CSV::Table.new is returned for
+ # empty tables.
#
def headers
- @headers.dup
+ if @table.empty?
+ @headers.dup
+ else
+ @table.first.headers
+ end
end
#
diff --git a/lib/csv/version.rb b/lib/csv/version.rb
index d6b59b3097..0b4b7d1966 100644
--- a/lib/csv/version.rb
+++ b/lib/csv/version.rb
@@ -2,5 +2,5 @@
class CSV
# The version of the installed library.
- VERSION = "3.0.2"
+ VERSION = "3.0.4"
end
diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb
index 2f2ab095d7..36db9d4014 100644
--- a/lib/csv/writer.rb
+++ b/lib/csv/writer.rb
@@ -31,7 +31,10 @@ class CSV
@headers ||= row if @use_headers
@lineno += 1
- line = row.collect(&@quote).join(@column_separator) + @row_separator
+ converted_row = row.collect do |field|
+ quote(field)
+ end
+ line = converted_row.join(@column_separator) + @row_separator
if @output_encoding
line = line.encode(@output_encoding)
end
@@ -90,37 +93,16 @@ class CSV
else
@row_separator = row_separator.to_s.encode(@encoding)
end
- quote_character = @options[:quote_character]
- quote = lambda do |field|
- field = String(field)
- encoded_quote_character = quote_character.encode(field.encoding)
- encoded_quote_character +
- field.gsub(encoded_quote_character,
- encoded_quote_character * 2) +
- encoded_quote_character
- end
- if @options[:force_quotes]
- @quote = quote
- else
- quotable_pattern =
+ @quote_character = @options[:quote_character]
+ @force_quotes = @options[:force_quotes]
+ unless @force_quotes
+ @quotable_pattern =
Regexp.new("[\r\n".encode(@encoding) +
Regexp.escape(@column_separator) +
- Regexp.escape(quote_character.encode(@encoding)) +
+ Regexp.escape(@quote_character.encode(@encoding)) +
"]".encode(@encoding))
- @quote = lambda do |field|
- if field.nil? # represent +nil+ fields as empty unquoted fields
- ""
- else
- field = String(field) # Stringify fields
- # represent empty fields as empty quoted fields
- if field.empty? or quotable_pattern.match?(field)
- quote.call(field)
- else
- field # unquoted field
- end
- end
- end
end
+ @quote_empty = @options.fetch(:quote_empty, true)
end
def prepare_output
@@ -140,5 +122,32 @@ class CSV
end
end
end
+
+ def quote_field(field)
+ field = String(field)
+ encoded_quote_character = @quote_character.encode(field.encoding)
+ encoded_quote_character +
+ field.gsub(encoded_quote_character,
+ encoded_quote_character * 2) +
+ encoded_quote_character
+ end
+
+ def quote(field)
+ if @force_quotes
+ quote_field(field)
+ else
+ if field.nil? # represent +nil+ fields as empty unquoted fields
+ ""
+ else
+ field = String(field) # Stringify fields
+ # represent empty fields as empty quoted fields
+ if (@quote_empty and field.empty?) or @quotable_pattern.match?(field)
+ quote_field(field)
+ else
+ field # unquoted field
+ end
+ end
+ end
+ end
end
end