diff options
author | Hiroshi SHIBATA <hsbt@ruby-lang.org> | 2022-08-26 14:53:21 +0900 |
---|---|---|
committer | nagachika <nagachika@ruby-lang.org> | 2022-09-03 15:54:07 +0900 |
commit | cd0c2a67c482c441ac7f0a07c0f81573d6b6072f (patch) | |
tree | eef3b14c36dad87ec04eb990a022f67fb1094021 /lib | |
parent | c69fffe67d7ad9c1afc4799222e819e0617e11c0 (diff) |
Merge csv-3.2.4
Diffstat (limited to 'lib')
-rw-r--r-- | lib/csv.rb | 24 | ||||
-rw-r--r-- | lib/csv/fields_converter.rb | 5 | ||||
-rw-r--r-- | lib/csv/parser.rb | 78 | ||||
-rw-r--r-- | lib/csv/row.rb | 2 | ||||
-rw-r--r-- | lib/csv/version.rb | 2 | ||||
-rw-r--r-- | lib/csv/writer.rb | 10 |
6 files changed, 70 insertions, 51 deletions
diff --git a/lib/csv.rb b/lib/csv.rb index 31e46d91ed..20cfda4b41 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -95,14 +95,11 @@ require "stringio" require_relative "csv/fields_converter" require_relative "csv/input_record_separator" -require_relative "csv/match_p" require_relative "csv/parser" require_relative "csv/row" require_relative "csv/table" require_relative "csv/writer" -using CSV::MatchP if CSV.const_defined?(:MatchP) - # == \CSV # # === In a Hurry? @@ -866,8 +863,9 @@ class CSV # <b><tt>index</tt></b>:: The zero-based index of the field in its row. # <b><tt>line</tt></b>:: The line of the data source this row is from. # <b><tt>header</tt></b>:: The header for the column, when available. + # <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not. # - FieldInfo = Struct.new(:index, :line, :header) + FieldInfo = Struct.new(:index, :line, :header, :quoted?) # A Regexp used to find and convert some common Date formats. DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | @@ -875,10 +873,9 @@ class CSV # A Regexp used to find and convert some common DateTime formats. DateTimeMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} | - \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} | - # ISO-8601 + # ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse \d{4}-\d{2}-\d{2} - (?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)? + (?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)? )\z /x # The encoding used by all converters. @@ -1893,8 +1890,19 @@ class CSV raise ArgumentError.new("Cannot parse nil as CSV") if data.nil? if data.is_a?(String) + if encoding + if encoding.is_a?(String) + data_external_encoding, data_internal_encoding = encoding.split(":", 2) + if data_internal_encoding + data = data.encode(data_internal_encoding, data_external_encoding) + else + data = data.dup.force_encoding(data_external_encoding) + end + else + data = data.dup.force_encoding(encoding) + end + end @io = StringIO.new(data) - @io.set_encoding(encoding || data.encoding) else @io = data end diff --git a/lib/csv/fields_converter.rb b/lib/csv/fields_converter.rb index b206118d99..d15977d379 100644 --- a/lib/csv/fields_converter.rb +++ b/lib/csv/fields_converter.rb @@ -44,7 +44,7 @@ class CSV @converters.empty? end - def convert(fields, headers, lineno) + def convert(fields, headers, lineno, quoted_fields) return fields unless need_convert? fields.collect.with_index do |field, index| @@ -63,7 +63,8 @@ class CSV else header = nil end - field = converter[field, FieldInfo.new(index, lineno, header)] + quoted = quoted_fields[index] + field = converter[field, FieldInfo.new(index, lineno, header, quoted)] end break unless field.is_a?(String) # short-circuit pipeline for speed end diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 7fe5f0d3ab..afb3131cd5 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -2,15 +2,10 @@ require "strscan" -require_relative "delete_suffix" require_relative "input_record_separator" -require_relative "match_p" require_relative "row" require_relative "table" -using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix) -using CSV::MatchP if CSV.const_defined?(:MatchP) - class CSV # Note: Don't use this class directly. This is an internal class. class Parser @@ -763,9 +758,10 @@ class CSV case headers when Array @raw_headers = headers + quoted_fields = [false] * @raw_headers.size @use_headers = true when String - @raw_headers = parse_headers(headers) + @raw_headers, quoted_fields = parse_headers(headers) @use_headers = true when nil, false @raw_headers = nil @@ -775,21 +771,28 @@ class CSV @use_headers = true end if @raw_headers - @headers = adjust_headers(@raw_headers) + @headers = adjust_headers(@raw_headers, quoted_fields) else @headers = nil end end def parse_headers(row) - CSV.parse_line(row, - col_sep: @column_separator, - row_sep: @row_separator, - quote_char: @quote_character) + quoted_fields = [] + converter = lambda do |field, info| + quoted_fields << info.quoted? + field + end + headers = CSV.parse_line(row, + col_sep: @column_separator, + row_sep: @row_separator, + quote_char: @quote_character, + converters: [converter]) + [headers, quoted_fields] end - def adjust_headers(headers) - adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno) + def adjust_headers(headers, quoted_fields) + adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields) adjusted_headers.each {|h| h.freeze if h.is_a? String} adjusted_headers end @@ -933,9 +936,11 @@ class CSV if line.empty? next if @skip_blanks row = [] + quoted_fields = [] else line = strip_value(line) row = line.split(@split_column_separator, -1) + quoted_fields = [false] * row.size if @max_field_size row.each do |column| validate_field_size(column) @@ -949,7 +954,7 @@ class CSV end end @last_line = original_line - emit_row(row, &block) + emit_row(row, quoted_fields, &block) end end @@ -971,25 +976,30 @@ class CSV next end row = [] + quoted_fields = [] elsif line.include?(@cr) or line.include?(@lf) @scanner.keep_back @need_robust_parsing = true return parse_quotable_robust(&block) else row = line.split(@split_column_separator, -1) + quoted_fields = [] n_columns = row.size i = 0 while i < n_columns column = row[i] if column.empty? + quoted_fields << false row[i] = nil else n_quotes = column.count(@quote_character) if n_quotes.zero? + quoted_fields << false # no quote elsif n_quotes == 2 and column.start_with?(@quote_character) and column.end_with?(@quote_character) + quoted_fields << true row[i] = column[1..-2] else @scanner.keep_back @@ -1004,13 +1014,14 @@ class CSV @scanner.keep_drop @scanner.keep_start @last_line = original_line - emit_row(row, &block) + emit_row(row, quoted_fields, &block) end @scanner.keep_drop end def parse_quotable_robust(&block) row = [] + quoted_fields = [] skip_needless_lines start_row while true @@ -1024,20 +1035,24 @@ class CSV end if parse_column_end row << value + quoted_fields << @quoted_column_value elsif parse_row_end if row.empty? and value.nil? - emit_row([], &block) unless @skip_blanks + emit_row([], [], &block) unless @skip_blanks else row << value - emit_row(row, &block) + quoted_fields << @quoted_column_value + emit_row(row, quoted_fields, &block) row = [] + quoted_fields = [] end skip_needless_lines start_row elsif @scanner.eos? break if row.empty? and value.nil? row << value - emit_row(row, &block) + quoted_fields << @quoted_column_value + emit_row(row, quoted_fields, &block) break else if @quoted_column_value @@ -1141,7 +1156,7 @@ class CSV if (n_quotes % 2).zero? quotes[0, (n_quotes - 2) / 2] else - value = quotes[0, (n_quotes - 1) / 2] + value = quotes[0, n_quotes / 2] while true quoted_value = @scanner.scan_all(@quoted_value) value << quoted_value if quoted_value @@ -1165,11 +1180,9 @@ class CSV n_quotes = quotes.size if n_quotes == 1 break - elsif (n_quotes % 2) == 1 - value << quotes[0, (n_quotes - 1) / 2] - break else value << quotes[0, n_quotes / 2] + break if (n_quotes % 2) == 1 end end value @@ -1205,18 +1218,15 @@ class CSV def strip_value(value) return value unless @strip - return nil if value.nil? + return value if value.nil? case @strip when String - size = value.size - while value.start_with?(@strip) - size -= 1 - value = value[1, size] + while value.delete_prefix!(@strip) + # do nothing end - while value.end_with?(@strip) - size -= 1 - value = value[0, size] + while value.delete_suffix!(@strip) + # do nothing end else value.strip! @@ -1239,22 +1249,22 @@ class CSV @scanner.keep_start end - def emit_row(row, &block) + def emit_row(row, quoted_fields, &block) @lineno += 1 raw_row = row if @use_headers if @headers.nil? - @headers = adjust_headers(row) + @headers = adjust_headers(row, quoted_fields) return unless @return_headers row = Row.new(@headers, row, true) else row = Row.new(@headers, - @fields_converter.convert(raw_row, @headers, @lineno)) + @fields_converter.convert(raw_row, @headers, @lineno, quoted_fields)) end else # convert fields, if needed... - row = @fields_converter.convert(raw_row, nil, @lineno) + row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields) end # inject unconverted fields and accessor, if requested... diff --git a/lib/csv/row.rb b/lib/csv/row.rb index 62e429fc6e..500adb1882 100644 --- a/lib/csv/row.rb +++ b/lib/csv/row.rb @@ -703,7 +703,7 @@ class CSV # by +index_or_header+ and +specifiers+. # # The nested objects may be instances of various classes. - # See {Dig Methods}[https://docs.ruby-lang.org/en/master/doc/dig_methods_rdoc.html]. + # See {Dig Methods}[https://docs.ruby-lang.org/en/master/dig_methods_rdoc.html]. # # Examples: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" diff --git a/lib/csv/version.rb b/lib/csv/version.rb index edab1c31e0..eaddde9a23 100644 --- a/lib/csv/version.rb +++ b/lib/csv/version.rb @@ -2,5 +2,5 @@ class CSV # The version of the installed library. - VERSION = "3.2.3" + VERSION = "3.2.4" end diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb index 4a9a35c5af..030a295bc9 100644 --- a/lib/csv/writer.rb +++ b/lib/csv/writer.rb @@ -1,11 +1,8 @@ # frozen_string_literal: true require_relative "input_record_separator" -require_relative "match_p" require_relative "row" -using CSV::MatchP if CSV.const_defined?(:MatchP) - class CSV # Note: Don't use this class directly. This is an internal class. class Writer @@ -42,7 +39,10 @@ class CSV @headers ||= row if @use_headers @lineno += 1 - row = @fields_converter.convert(row, nil, lineno) if @fields_converter + if @fields_converter + quoted_fields = [false] * row.size + row = @fields_converter.convert(row, nil, lineno, quoted_fields) + end i = -1 converted_row = row.collect do |field| @@ -97,7 +97,7 @@ class CSV return unless @headers converter = @options[:header_fields_converter] - @headers = converter.convert(@headers, nil, 0) + @headers = converter.convert(@headers, nil, 0, []) @headers.each do |header| header.freeze if header.is_a?(String) end |