diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2019-01-26 08:02:47 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2019-01-26 08:02:47 +0000 |
commit | df7ac8afa196bb9d1836fc6d188c831b27b8e507 (patch) | |
tree | 18ffe400c7cf54d0849ab65109bcaafa66ed1137 /lib | |
parent | eb1d5ab5a0309e117ea2cb05915131393adc893d (diff) |
merge revision(s) 66922: [Backport #15521]
Upgrade CSV to 3.0.4
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_6@66926 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib')
-rw-r--r-- | lib/csv.rb | 21 | ||||
-rw-r--r-- | lib/csv/csv.gemspec | 2 | ||||
-rw-r--r-- | lib/csv/parser.rb | 61 | ||||
-rw-r--r-- | lib/csv/row.rb | 7 | ||||
-rw-r--r-- | lib/csv/table.rb | 11 | ||||
-rw-r--r-- | lib/csv/version.rb | 2 | ||||
-rw-r--r-- | lib/csv/writer.rb | 65 |
7 files changed, 115 insertions, 54 deletions
diff --git a/lib/csv.rb b/lib/csv.rb index ebdc6e5c6d..1a173c6d68 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -397,6 +397,7 @@ class CSV # <b><tt>:force_quotes</tt></b>:: +false+ # <b><tt>:skip_lines</tt></b>:: +nil+ # <b><tt>:liberal_parsing</tt></b>:: +false+ + # <b><tt>:quote_empty</tt></b>:: +true+ # DEFAULT_OPTIONS = { col_sep: ",", @@ -412,6 +413,7 @@ class CSV force_quotes: false, skip_lines: nil, liberal_parsing: false, + quote_empty: true, }.freeze # @@ -534,7 +536,7 @@ class CSV str.seek(0, IO::SEEK_END) else encoding = options[:encoding] - str = String.new + str = +"" str.force_encoding(encoding) if encoding end csv = new(str, options) # wrap @@ -557,11 +559,11 @@ class CSV # def self.generate_line(row, **options) options = {row_sep: $INPUT_RECORD_SEPARATOR}.merge(options) - str = String.new + str = +"" if options[:encoding] str.force_encoding(options[:encoding]) - elsif field = row.find { |f| not f.nil? } - str.force_encoding(String(field).encoding) + elsif field = row.find {|f| f.is_a?(String)} + str.force_encoding(field.encoding) end (new(str, options) << row).string end @@ -882,6 +884,7 @@ class CSV # <b><tt>:empty_value</tt></b>:: When set an object, any values of a # blank string field is replaced by # the set object. + # <b><tt>:quote_empty</tt></b>:: TODO # # See CSV::DEFAULT_OPTIONS for the default settings. # @@ -907,7 +910,8 @@ class CSV external_encoding: nil, encoding: nil, nil_value: nil, - empty_value: "") + empty_value: "", + quote_empty: true) raise ArgumentError.new("Cannot parse nil as CSV") if data.nil? # create the IO object we will read from @@ -947,6 +951,7 @@ class CSV column_separator: col_sep, row_separator: row_sep, quote_character: quote_char, + quote_empty: quote_empty, } @writer = nil @@ -1178,9 +1183,8 @@ class CSV # def read rows = to_a - headers = parser.headers - if headers - Table.new(rows, headers: headers) + if parser.use_headers? + Table.new(rows, headers: parser.headers) else rows end @@ -1240,7 +1244,6 @@ class CSV end end _headers = headers - _headers = headers str << " headers:" << _headers.inspect if _headers str << ">" begin diff --git a/lib/csv/csv.gemspec b/lib/csv/csv.gemspec index 0c9d265584..f57d9efb7d 100644 --- a/lib/csv/csv.gemspec +++ b/lib/csv/csv.gemspec @@ -38,6 +38,6 @@ Gem::Specification.new do |spec| spec.add_development_dependency "bundler" spec.add_development_dependency "rake" - spec.add_development_dependency "benchmark-ips" + spec.add_development_dependency "benchmark_driver" spec.add_development_dependency "simplecov" end diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 2682c27ea3..e6cbc07461 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -170,6 +170,7 @@ class CSV @input = input @options = options @samples = [] + @parsed = false prepare end @@ -229,6 +230,8 @@ class CSV def parse(&block) return to_enum(__method__) unless block_given? + return if @parsed + if @return_headers and @headers headers = Row.new(@headers, @raw_headers, true) if @unconverted_fields @@ -262,10 +265,10 @@ class CSV skip_needless_lines start_row elsif @scanner.eos? - return if row.empty? and value.nil? + break if row.empty? and value.nil? row << value emit_row(row, &block) - return + break else if @quoted_column_value message = "Do not allow except col_sep_split_separator " + @@ -287,6 +290,12 @@ class CSV message = "Invalid byte sequence in #{@encoding}" raise MalformedCSVError.new(message, @lineno + 1) end + + @parsed = true + end + + def use_headers? + @use_headers end private @@ -300,7 +309,18 @@ class CSV def prepare_variable @encoding = @options[:encoding] - @liberal_parsing = @options[:liberal_parsing] + liberal_parsing = @options[:liberal_parsing] + if liberal_parsing + @liberal_parsing = true + if liberal_parsing.is_a?(Hash) + @double_quote_outside_quote = + liberal_parsing[:double_quote_outside_quote] + else + @double_quote_outside_quote = false + end + else + @liberal_parsing = false + end @unconverted_fields = @options[:unconverted_fields] @field_size_limit = @options[:field_size_limit] @skip_blanks = @options[:skip_blanks] @@ -318,6 +338,7 @@ class CSV end escaped_column_separator = Regexp.escape(@column_separator) + escaped_first_column_separator = Regexp.escape(@column_separator[0]) escaped_row_separator = Regexp.escape(@row_separator) escaped_quote_character = Regexp.escape(@quote_character) @@ -341,8 +362,11 @@ class CSV @column_ends = @column_separator.each_char.collect do |char| Regexp.new(Regexp.escape(char)) end + @first_column_separators = Regexp.new(escaped_first_column_separator + + "+".encode(@encoding)) else @column_ends = nil + @first_column_separators = nil end @row_end = Regexp.new(escaped_row_separator) if @row_separator.size > 1 @@ -359,12 +383,12 @@ class CSV "]+".encode(@encoding)) if @liberal_parsing @unquoted_value = Regexp.new("[^".encode(@encoding) + - escaped_column_separator + + escaped_first_column_separator + "\r\n]+".encode(@encoding)) else @unquoted_value = Regexp.new("[^".encode(@encoding) + escaped_quote_character + - escaped_column_separator + + escaped_first_column_separator + "\r\n]+".encode(@encoding)) end @cr_or_lf = Regexp.new("[\r\n]".encode(@encoding)) @@ -583,6 +607,13 @@ class CSV if quoted_value unquoted_value = parse_unquoted_column_value if unquoted_value + if @double_quote_outside_quote + unquoted_value = unquoted_value.gsub(@quote_character * 2, + @quote_character) + if quoted_value.empty? # %Q{""...} case + return @quote_character + unquoted_value + end + end @quote_character + quoted_value + @quote_character + unquoted_value else quoted_value @@ -601,7 +632,25 @@ class CSV def parse_unquoted_column_value value = @scanner.scan_all(@unquoted_value) - @unquoted_column_value = true if value + return nil unless value + + @unquoted_column_value = true + if @first_column_separators + while true + @scanner.keep_start + is_column_end = @column_ends.all? do |column_end| + @scanner.scan(column_end) + end + @scanner.keep_back + break if is_column_end + sub_separator = @scanner.scan_all(@first_column_separators) + break if sub_separator.nil? + value << sub_separator + sub_value = @scanner.scan_all(@unquoted_value) + break if sub_value.nil? + value << sub_value + end + end value end diff --git a/lib/csv/row.rb b/lib/csv/row.rb index 31eab2d0a4..c79d75cd8a 100644 --- a/lib/csv/row.rb +++ b/lib/csv/row.rb @@ -130,6 +130,7 @@ class CSV alias_method :include?, :has_key? alias_method :key?, :has_key? alias_method :member?, :has_key? + alias_method :header?, :has_key? # # :call-seq: @@ -286,12 +287,6 @@ class CSV index.nil? ? nil : index + minimum_index end - # Returns +true+ if +name+ is a header for this row, and +false+ otherwise. - def header?(name) - headers.include? name - end - alias_method :include?, :header? - # # Returns +true+ if +data+ matches a field in this row, and +false+ # otherwise. diff --git a/lib/csv/table.rb b/lib/csv/table.rb index b13d1ada10..71eb885de5 100644 --- a/lib/csv/table.rb +++ b/lib/csv/table.rb @@ -19,7 +19,7 @@ class CSV # The optional +headers+ parameter can be set to Array of headers. # If headers aren't set, headers are fetched from CSV::Row objects. # Otherwise, headers() method will return headers being set in - # headers arugument. + # headers argument. # # A CSV::Table object supports the following Array methods through # delegation: @@ -133,10 +133,15 @@ class CSV # # Returns the headers for the first row of this table (assumed to match all - # other rows). An empty Array is returned for empty tables. + # other rows). The headers Array passed to CSV::Table.new is returned for + # empty tables. # def headers - @headers.dup + if @table.empty? + @headers.dup + else + @table.first.headers + end end # diff --git a/lib/csv/version.rb b/lib/csv/version.rb index d6b59b3097..0b4b7d1966 100644 --- a/lib/csv/version.rb +++ b/lib/csv/version.rb @@ -2,5 +2,5 @@ class CSV # The version of the installed library. - VERSION = "3.0.2" + VERSION = "3.0.4" end diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb index 2f2ab095d7..36db9d4014 100644 --- a/lib/csv/writer.rb +++ b/lib/csv/writer.rb @@ -31,7 +31,10 @@ class CSV @headers ||= row if @use_headers @lineno += 1 - line = row.collect(&@quote).join(@column_separator) + @row_separator + converted_row = row.collect do |field| + quote(field) + end + line = converted_row.join(@column_separator) + @row_separator if @output_encoding line = line.encode(@output_encoding) end @@ -90,37 +93,16 @@ class CSV else @row_separator = row_separator.to_s.encode(@encoding) end - quote_character = @options[:quote_character] - quote = lambda do |field| - field = String(field) - encoded_quote_character = quote_character.encode(field.encoding) - encoded_quote_character + - field.gsub(encoded_quote_character, - encoded_quote_character * 2) + - encoded_quote_character - end - if @options[:force_quotes] - @quote = quote - else - quotable_pattern = + @quote_character = @options[:quote_character] + @force_quotes = @options[:force_quotes] + unless @force_quotes + @quotable_pattern = Regexp.new("[\r\n".encode(@encoding) + Regexp.escape(@column_separator) + - Regexp.escape(quote_character.encode(@encoding)) + + Regexp.escape(@quote_character.encode(@encoding)) + "]".encode(@encoding)) - @quote = lambda do |field| - if field.nil? # represent +nil+ fields as empty unquoted fields - "" - else - field = String(field) # Stringify fields - # represent empty fields as empty quoted fields - if field.empty? or quotable_pattern.match?(field) - quote.call(field) - else - field # unquoted field - end - end - end end + @quote_empty = @options.fetch(:quote_empty, true) end def prepare_output @@ -140,5 +122,32 @@ class CSV end end end + + def quote_field(field) + field = String(field) + encoded_quote_character = @quote_character.encode(field.encoding) + encoded_quote_character + + field.gsub(encoded_quote_character, + encoded_quote_character * 2) + + encoded_quote_character + end + + def quote(field) + if @force_quotes + quote_field(field) + else + if field.nil? # represent +nil+ fields as empty unquoted fields + "" + else + field = String(field) # Stringify fields + # represent empty fields as empty quoted fields + if (@quote_empty and field.empty?) or @quotable_pattern.match?(field) + quote_field(field) + else + field # unquoted field + end + end + end + end end end |