diff options
Diffstat (limited to 'lib/csv')
| -rw-r--r-- | lib/csv/core_ext/array.rb | 9 | ||||
| -rw-r--r-- | lib/csv/core_ext/string.rb | 9 | ||||
| -rw-r--r-- | lib/csv/csv.gemspec | 64 | ||||
| -rw-r--r-- | lib/csv/delete_suffix.rb | 18 | ||||
| -rw-r--r-- | lib/csv/fields_converter.rb | 89 | ||||
| -rw-r--r-- | lib/csv/input_record_separator.rb | 18 | ||||
| -rw-r--r-- | lib/csv/match_p.rb | 20 | ||||
| -rw-r--r-- | lib/csv/parser.rb | 1289 | ||||
| -rw-r--r-- | lib/csv/row.rb | 757 | ||||
| -rw-r--r-- | lib/csv/table.rb | 1056 | ||||
| -rw-r--r-- | lib/csv/version.rb | 6 | ||||
| -rw-r--r-- | lib/csv/writer.rb | 210 |
12 files changed, 3545 insertions, 0 deletions
diff --git a/lib/csv/core_ext/array.rb b/lib/csv/core_ext/array.rb new file mode 100644 index 0000000000..8beb06b082 --- /dev/null +++ b/lib/csv/core_ext/array.rb @@ -0,0 +1,9 @@ +class Array # :nodoc: + # Equivalent to CSV::generate_line(self, options) + # + # ["CSV", "data"].to_csv + # #=> "CSV,data\n" + def to_csv(**options) + CSV.generate_line(self, **options) + end +end diff --git a/lib/csv/core_ext/string.rb b/lib/csv/core_ext/string.rb new file mode 100644 index 0000000000..9b1d31c2a4 --- /dev/null +++ b/lib/csv/core_ext/string.rb @@ -0,0 +1,9 @@ +class String # :nodoc: + # Equivalent to CSV::parse_line(self, options) + # + # "CSV,data".parse_csv + # #=> ["CSV", "data"] + def parse_csv(**options) + CSV.parse_line(self, **options) + end +end diff --git a/lib/csv/csv.gemspec b/lib/csv/csv.gemspec new file mode 100644 index 0000000000..11c5b0f2a6 --- /dev/null +++ b/lib/csv/csv.gemspec @@ -0,0 +1,64 @@ +# frozen_string_literal: true + +begin + require_relative "lib/csv/version" +rescue LoadError + # for Ruby core repository + require_relative "version" +end + +Gem::Specification.new do |spec| + spec.name = "csv" + spec.version = CSV::VERSION + spec.authors = ["James Edward Gray II", "Kouhei Sutou"] + spec.email = [nil, "kou@cozmixng.org"] + + spec.summary = "CSV Reading and Writing" + spec.description = "The CSV library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed." + spec.homepage = "https://github.com/ruby/csv" + spec.licenses = ["Ruby", "BSD-2-Clause"] + + lib_path = "lib" + spec.require_paths = [lib_path] + files = [] + lib_dir = File.join(__dir__, lib_path) + if File.exist?(lib_dir) + Dir.chdir(lib_dir) do + Dir.glob("**/*.rb").each do |file| + files << "lib/#{file}" + end + end + end + doc_dir = File.join(__dir__, "doc") + if File.exist?(doc_dir) + Dir.chdir(doc_dir) do + Dir.glob("**/*.rdoc").each do |rdoc_file| + files << "doc/#{rdoc_file}" + end + end + end + spec.files = files + spec.rdoc_options.concat(["--main", "README.md"]) + rdoc_files = [ + "LICENSE.txt", + "NEWS.md", + "README.md", + ] + recipes_dir = File.join(doc_dir, "csv", "recipes") + if File.exist?(recipes_dir) + Dir.chdir(recipes_dir) do + Dir.glob("**/*.rdoc").each do |recipe_file| + rdoc_files << "doc/csv/recipes/#{recipe_file}" + end + end + end + spec.extra_rdoc_files = rdoc_files + + spec.required_ruby_version = ">= 2.5.0" + + # spec.add_dependency "stringio", ">= 0.1.3" + spec.add_development_dependency "bundler" + spec.add_development_dependency "rake" + spec.add_development_dependency "benchmark_driver" + spec.add_development_dependency "test-unit", ">= 3.4.8" +end diff --git a/lib/csv/delete_suffix.rb b/lib/csv/delete_suffix.rb new file mode 100644 index 0000000000..d457718997 --- /dev/null +++ b/lib/csv/delete_suffix.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +# This provides String#delete_suffix? for Ruby 2.4. +unless String.method_defined?(:delete_suffix) + class CSV + module DeleteSuffix + refine String do + def delete_suffix(suffix) + if end_with?(suffix) + self[0...-suffix.size] + else + self + end + end + end + end + end +end diff --git a/lib/csv/fields_converter.rb b/lib/csv/fields_converter.rb new file mode 100644 index 0000000000..d15977d379 --- /dev/null +++ b/lib/csv/fields_converter.rb @@ -0,0 +1,89 @@ +# frozen_string_literal: true + +class CSV + # Note: Don't use this class directly. This is an internal class. + class FieldsConverter + include Enumerable + # + # A CSV::FieldsConverter is a data structure for storing the + # fields converter properties to be passed as a parameter + # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options)) + # + + def initialize(options={}) + @converters = [] + @nil_value = options[:nil_value] + @empty_value = options[:empty_value] + @empty_value_is_empty_string = (@empty_value == "") + @accept_nil = options[:accept_nil] + @builtin_converters_name = options[:builtin_converters_name] + @need_static_convert = need_static_convert? + end + + def add_converter(name=nil, &converter) + if name.nil? # custom converter + @converters << converter + else # named converter + combo = builtin_converters[name] + case combo + when Array # combo converter + combo.each do |sub_name| + add_converter(sub_name) + end + else # individual named converter + @converters << combo + end + end + end + + def each(&block) + @converters.each(&block) + end + + def empty? + @converters.empty? + end + + def convert(fields, headers, lineno, quoted_fields) + return fields unless need_convert? + + fields.collect.with_index do |field, index| + if field.nil? + field = @nil_value + elsif field.is_a?(String) and field.empty? + field = @empty_value unless @empty_value_is_empty_string + end + @converters.each do |converter| + break if field.nil? and @accept_nil + if converter.arity == 1 # straight field converter + field = converter[field] + else # FieldInfo converter + if headers + header = headers[index] + else + header = nil + end + quoted = quoted_fields[index] + field = converter[field, FieldInfo.new(index, lineno, header, quoted)] + end + break unless field.is_a?(String) # short-circuit pipeline for speed + end + field # final state of each field, converted or original + end + end + + private + def need_static_convert? + not (@nil_value.nil? and @empty_value_is_empty_string) + end + + def need_convert? + @need_static_convert or + (not @converters.empty?) + end + + def builtin_converters + @builtin_converters ||= ::CSV.const_get(@builtin_converters_name) + end + end +end diff --git a/lib/csv/input_record_separator.rb b/lib/csv/input_record_separator.rb new file mode 100644 index 0000000000..7a99343c0c --- /dev/null +++ b/lib/csv/input_record_separator.rb @@ -0,0 +1,18 @@ +require "English" +require "stringio" + +class CSV + module InputRecordSeparator + class << self + if RUBY_VERSION >= "3.0.0" + def value + "\n" + end + else + def value + $INPUT_RECORD_SEPARATOR + end + end + end + end +end diff --git a/lib/csv/match_p.rb b/lib/csv/match_p.rb new file mode 100644 index 0000000000..775559a3eb --- /dev/null +++ b/lib/csv/match_p.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +# This provides String#match? and Regexp#match? for Ruby 2.3. +unless String.method_defined?(:match?) + class CSV + module MatchP + refine String do + def match?(pattern) + self =~ pattern + end + end + + refine Regexp do + def match?(string) + self =~ string + end + end + end + end +end diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb new file mode 100644 index 0000000000..afb3131cd5 --- /dev/null +++ b/lib/csv/parser.rb @@ -0,0 +1,1289 @@ +# frozen_string_literal: true + +require "strscan" + +require_relative "input_record_separator" +require_relative "row" +require_relative "table" + +class CSV + # Note: Don't use this class directly. This is an internal class. + class Parser + # + # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO + # or String object being read from or written to. Your data is never transcoded + # (unless you ask Ruby to transcode it for you) and will literally be parsed in + # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the + # Encoding of your data. This is accomplished by transcoding the parser itself + # into your Encoding. + # + + # Raised when encoding is invalid. + class InvalidEncoding < StandardError + end + + # Raised when unexpected case is happen. + class UnexpectedError < StandardError + end + + # + # CSV::Scanner receives a CSV output, scans it and return the content. + # It also controls the life cycle of the object with its methods +keep_start+, + # +keep_end+, +keep_back+, +keep_drop+. + # + # Uses StringScanner (the official strscan gem). Strscan provides lexical + # scanning operations on a String. We inherit its object and take advantage + # on the methods. For more information, please visit: + # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html + # + class Scanner < StringScanner + alias_method :scan_all, :scan + + def initialize(*args) + super + @keeps = [] + end + + def each_line(row_separator) + position = pos + rest.each_line(row_separator) do |line| + position += line.bytesize + self.pos = position + yield(line) + end + end + + def keep_start + @keeps.push(pos) + end + + def keep_end + start = @keeps.pop + string.byteslice(start, pos - start) + end + + def keep_back + self.pos = @keeps.pop + end + + def keep_drop + @keeps.pop + end + end + + # + # CSV::InputsScanner receives IO inputs, encoding and the chunk_size. + # It also controls the life cycle of the object with its methods +keep_start+, + # +keep_end+, +keep_back+, +keep_drop+. + # + # CSV::InputsScanner.scan() tries to match with pattern at the current position. + # If there's a match, the scanner advances the "scan pointer" and returns the matched string. + # Otherwise, the scanner returns nil. + # + # CSV::InputsScanner.rest() returns the "rest" of the string (i.e. everything after the scan pointer). + # If there is no more data (eos? = true), it returns "". + # + class InputsScanner + def initialize(inputs, encoding, row_separator, chunk_size: 8192) + @inputs = inputs.dup + @encoding = encoding + @row_separator = row_separator + @chunk_size = chunk_size + @last_scanner = @inputs.empty? + @keeps = [] + read_chunk + end + + def each_line(row_separator) + return enum_for(__method__, row_separator) unless block_given? + buffer = nil + input = @scanner.rest + position = @scanner.pos + offset = 0 + n_row_separator_chars = row_separator.size + # trace(__method__, :start, line, input) + while true + input.each_line(row_separator) do |line| + @scanner.pos += line.bytesize + if buffer + if n_row_separator_chars == 2 and + buffer.end_with?(row_separator[0]) and + line.start_with?(row_separator[1]) + buffer << line[0] + line = line[1..-1] + position += buffer.bytesize + offset + @scanner.pos = position + offset = 0 + yield(buffer) + buffer = nil + next if line.empty? + else + buffer << line + line = buffer + buffer = nil + end + end + if line.end_with?(row_separator) + position += line.bytesize + offset + @scanner.pos = position + offset = 0 + yield(line) + else + buffer = line + end + end + break unless read_chunk + input = @scanner.rest + position = @scanner.pos + offset = -buffer.bytesize if buffer + end + yield(buffer) if buffer + end + + def scan(pattern) + # trace(__method__, pattern, :start) + value = @scanner.scan(pattern) + # trace(__method__, pattern, :done, :last, value) if @last_scanner + return value if @last_scanner + + read_chunk if value and @scanner.eos? + # trace(__method__, pattern, :done, value) + value + end + + def scan_all(pattern) + # trace(__method__, pattern, :start) + value = @scanner.scan(pattern) + # trace(__method__, pattern, :done, :last, value) if @last_scanner + return value if @last_scanner + + return nil if value.nil? + while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) + # trace(__method__, pattern, :sub, sub_value) + value << sub_value + end + # trace(__method__, pattern, :done, value) + value + end + + def eos? + @scanner.eos? + end + + def keep_start + # trace(__method__, :start) + adjust_last_keep + @keeps.push([@scanner, @scanner.pos, nil]) + # trace(__method__, :done) + end + + def keep_end + # trace(__method__, :start) + scanner, start, buffer = @keeps.pop + if scanner == @scanner + keep = @scanner.string.byteslice(start, @scanner.pos - start) + else + keep = @scanner.string.byteslice(0, @scanner.pos) + end + if buffer + buffer << keep + keep = buffer + end + # trace(__method__, :done, keep) + keep + end + + def keep_back + # trace(__method__, :start) + scanner, start, buffer = @keeps.pop + if buffer + # trace(__method__, :rescan, start, buffer) + string = @scanner.string + if scanner == @scanner + keep = string.byteslice(start, string.bytesize - start) + else + keep = string + end + if keep and not keep.empty? + @inputs.unshift(StringIO.new(keep)) + @last_scanner = false + end + @scanner = StringScanner.new(buffer) + else + if @scanner != scanner + message = "scanners are different but no buffer: " + message += "#{@scanner.inspect}(#{@scanner.object_id}): " + message += "#{scanner.inspect}(#{scanner.object_id})" + raise UnexpectedError, message + end + # trace(__method__, :repos, start, buffer) + @scanner.pos = start + end + read_chunk if @scanner.eos? + end + + def keep_drop + _, _, buffer = @keeps.pop + # trace(__method__, :done, :empty) unless buffer + return unless buffer + + last_keep = @keeps.last + # trace(__method__, :done, :no_last_keep) unless last_keep + return unless last_keep + + if last_keep[2] + last_keep[2] << buffer + else + last_keep[2] = buffer + end + # trace(__method__, :done) + end + + def rest + @scanner.rest + end + + def check(pattern) + @scanner.check(pattern) + end + + private + def trace(*args) + pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps]) + end + + def adjust_last_keep + # trace(__method__, :start) + + keep = @keeps.last + # trace(__method__, :done, :empty) if keep.nil? + return if keep.nil? + + scanner, start, buffer = keep + string = @scanner.string + if @scanner != scanner + start = 0 + end + if start == 0 and @scanner.eos? + keep_data = string + else + keep_data = string.byteslice(start, @scanner.pos - start) + end + if keep_data + if buffer + buffer << keep_data + else + keep[2] = keep_data.dup + end + end + + # trace(__method__, :done) + end + + def read_chunk + return false if @last_scanner + + adjust_last_keep + + input = @inputs.first + case input + when StringIO + string = input.read + raise InvalidEncoding unless string.valid_encoding? + # trace(__method__, :stringio, string) + @scanner = StringScanner.new(string) + @inputs.shift + @last_scanner = @inputs.empty? + true + else + chunk = input.gets(@row_separator, @chunk_size) + if chunk + raise InvalidEncoding unless chunk.valid_encoding? + # trace(__method__, :chunk, chunk) + @scanner = StringScanner.new(chunk) + if input.respond_to?(:eof?) and input.eof? + @inputs.shift + @last_scanner = @inputs.empty? + end + true + else + # trace(__method__, :no_chunk) + @scanner = StringScanner.new("".encode(@encoding)) + @inputs.shift + @last_scanner = @inputs.empty? + if @last_scanner + false + else + read_chunk + end + end + end + end + end + + def initialize(input, options) + @input = input + @options = options + @samples = [] + + prepare + end + + def column_separator + @column_separator + end + + def row_separator + @row_separator + end + + def quote_character + @quote_character + end + + def field_size_limit + @max_field_size&.succ + end + + def max_field_size + @max_field_size + end + + def skip_lines + @skip_lines + end + + def unconverted_fields? + @unconverted_fields + end + + def headers + @headers + end + + def header_row? + @use_headers and @headers.nil? + end + + def return_headers? + @return_headers + end + + def skip_blanks? + @skip_blanks + end + + def liberal_parsing? + @liberal_parsing + end + + def lineno + @lineno + end + + def line + last_line + end + + def parse(&block) + return to_enum(__method__) unless block_given? + + if @return_headers and @headers and @raw_headers + headers = Row.new(@headers, @raw_headers, true) + if @unconverted_fields + headers = add_unconverted_fields(headers, []) + end + yield headers + end + + begin + @scanner ||= build_scanner + if quote_character.nil? + parse_no_quote(&block) + elsif @need_robust_parsing + parse_quotable_robust(&block) + else + parse_quotable_loose(&block) + end + rescue InvalidEncoding + if @scanner + ignore_broken_line + lineno = @lineno + else + lineno = @lineno + 1 + end + message = "Invalid byte sequence in #{@encoding}" + raise MalformedCSVError.new(message, lineno) + rescue UnexpectedError => error + if @scanner + ignore_broken_line + lineno = @lineno + else + lineno = @lineno + 1 + end + message = "This should not be happen: #{error.message}: " + message += "Please report this to https://github.com/ruby/csv/issues" + raise MalformedCSVError.new(message, lineno) + end + end + + def use_headers? + @use_headers + end + + private + # A set of tasks to prepare the file in order to parse it + def prepare + prepare_variable + prepare_quote_character + prepare_backslash + prepare_skip_lines + prepare_strip + prepare_separators + validate_strip_and_col_sep_options + prepare_quoted + prepare_unquoted + prepare_line + prepare_header + prepare_parser + end + + def prepare_variable + @need_robust_parsing = false + @encoding = @options[:encoding] + liberal_parsing = @options[:liberal_parsing] + if liberal_parsing + @liberal_parsing = true + if liberal_parsing.is_a?(Hash) + @double_quote_outside_quote = + liberal_parsing[:double_quote_outside_quote] + @backslash_quote = liberal_parsing[:backslash_quote] + else + @double_quote_outside_quote = false + @backslash_quote = false + end + @need_robust_parsing = true + else + @liberal_parsing = false + @backslash_quote = false + end + @unconverted_fields = @options[:unconverted_fields] + @max_field_size = @options[:max_field_size] + @skip_blanks = @options[:skip_blanks] + @fields_converter = @options[:fields_converter] + @header_fields_converter = @options[:header_fields_converter] + end + + def prepare_quote_character + @quote_character = @options[:quote_character] + if @quote_character.nil? + @escaped_quote_character = nil + @escaped_quote = nil + else + @quote_character = @quote_character.to_s.encode(@encoding) + if @quote_character.length != 1 + message = ":quote_char has to be nil or a single character String" + raise ArgumentError, message + end + @double_quote_character = @quote_character * 2 + @escaped_quote_character = Regexp.escape(@quote_character) + @escaped_quote = Regexp.new(@escaped_quote_character) + end + end + + def prepare_backslash + return unless @backslash_quote + + @backslash_character = "\\".encode(@encoding) + + @escaped_backslash_character = Regexp.escape(@backslash_character) + @escaped_backslash = Regexp.new(@escaped_backslash_character) + if @quote_character.nil? + @backslash_quote_character = nil + else + @backslash_quote_character = + @backslash_character + @escaped_quote_character + end + end + + def prepare_skip_lines + skip_lines = @options[:skip_lines] + case skip_lines + when String + @skip_lines = skip_lines.encode(@encoding) + when Regexp, nil + @skip_lines = skip_lines + else + unless skip_lines.respond_to?(:match) + message = + ":skip_lines has to respond to \#match: #{skip_lines.inspect}" + raise ArgumentError, message + end + @skip_lines = skip_lines + end + end + + def prepare_strip + @strip = @options[:strip] + @escaped_strip = nil + @strip_value = nil + @rstrip_value = nil + if @strip.is_a?(String) + case @strip.length + when 0 + raise ArgumentError, ":strip must not be an empty String" + when 1 + # ok + else + raise ArgumentError, ":strip doesn't support 2 or more characters yet" + end + @strip = @strip.encode(@encoding) + @escaped_strip = Regexp.escape(@strip) + if @quote_character + @strip_value = Regexp.new(@escaped_strip + + "+".encode(@encoding)) + @rstrip_value = Regexp.new(@escaped_strip + + "+\\z".encode(@encoding)) + end + @need_robust_parsing = true + elsif @strip + strip_values = " \t\f\v" + @escaped_strip = strip_values.encode(@encoding) + if @quote_character + @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding)) + @rstrip_value = Regexp.new("[#{strip_values}]+\\z".encode(@encoding)) + end + @need_robust_parsing = true + end + end + + begin + StringScanner.new("x").scan("x") + rescue TypeError + STRING_SCANNER_SCAN_ACCEPT_STRING = false + else + STRING_SCANNER_SCAN_ACCEPT_STRING = true + end + + def prepare_separators + column_separator = @options[:column_separator] + @column_separator = column_separator.to_s.encode(@encoding) + if @column_separator.size < 1 + message = ":col_sep must be 1 or more characters: " + message += column_separator.inspect + raise ArgumentError, message + end + @row_separator = + resolve_row_separator(@options[:row_separator]).encode(@encoding) + + @escaped_column_separator = Regexp.escape(@column_separator) + @escaped_first_column_separator = Regexp.escape(@column_separator[0]) + if @column_separator.size > 1 + @column_end = Regexp.new(@escaped_column_separator) + @column_ends = @column_separator.each_char.collect do |char| + Regexp.new(Regexp.escape(char)) + end + @first_column_separators = Regexp.new(@escaped_first_column_separator + + "+".encode(@encoding)) + else + if STRING_SCANNER_SCAN_ACCEPT_STRING + @column_end = @column_separator + else + @column_end = Regexp.new(@escaped_column_separator) + end + @column_ends = nil + @first_column_separators = nil + end + + escaped_row_separator = Regexp.escape(@row_separator) + @row_end = Regexp.new(escaped_row_separator) + if @row_separator.size > 1 + @row_ends = @row_separator.each_char.collect do |char| + Regexp.new(Regexp.escape(char)) + end + else + @row_ends = nil + end + + @cr = "\r".encode(@encoding) + @lf = "\n".encode(@encoding) + @line_end = Regexp.new("\r\n|\n|\r".encode(@encoding)) + @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding)) + end + + # This method verifies that there are no (obvious) ambiguities with the + # provided +col_sep+ and +strip+ parsing options. For example, if +col_sep+ + # and +strip+ were both equal to +\t+, then there would be no clear way to + # parse the input. + def validate_strip_and_col_sep_options + return unless @strip + + if @strip.is_a?(String) + if @column_separator.start_with?(@strip) || @column_separator.end_with?(@strip) + raise ArgumentError, + "The provided strip (#{@escaped_strip}) and " \ + "col_sep (#{@escaped_column_separator}) options are incompatible." + end + else + if Regexp.new("\\A[#{@escaped_strip}]|[#{@escaped_strip}]\\z").match?(@column_separator) + raise ArgumentError, + "The provided strip (true) and " \ + "col_sep (#{@escaped_column_separator}) options are incompatible." + end + end + end + + def prepare_quoted + if @quote_character + @quotes = Regexp.new(@escaped_quote_character + + "+".encode(@encoding)) + no_quoted_values = @escaped_quote_character.dup + if @backslash_quote + no_quoted_values << @escaped_backslash_character + end + @quoted_value = Regexp.new("[^".encode(@encoding) + + no_quoted_values + + "]+".encode(@encoding)) + end + if @escaped_strip + @split_column_separator = Regexp.new(@escaped_strip + + "*".encode(@encoding) + + @escaped_column_separator + + @escaped_strip + + "*".encode(@encoding)) + else + if @column_separator == " ".encode(@encoding) + @split_column_separator = Regexp.new(@escaped_column_separator) + else + @split_column_separator = @column_separator + end + end + end + + def prepare_unquoted + return if @quote_character.nil? + + no_unquoted_values = "\r\n".encode(@encoding) + no_unquoted_values << @escaped_first_column_separator + unless @liberal_parsing + no_unquoted_values << @escaped_quote_character + end + @unquoted_value = Regexp.new("[^".encode(@encoding) + + no_unquoted_values + + "]+".encode(@encoding)) + end + + def resolve_row_separator(separator) + if separator == :auto + cr = "\r".encode(@encoding) + lf = "\n".encode(@encoding) + if @input.is_a?(StringIO) + pos = @input.pos + separator = detect_row_separator(@input.read, cr, lf) + @input.seek(pos) + elsif @input.respond_to?(:gets) + if @input.is_a?(File) + chunk_size = 32 * 1024 + else + chunk_size = 1024 + end + begin + while separator == :auto + # + # if we run out of data, it's probably a single line + # (ensure will set default value) + # + break unless sample = @input.gets(nil, chunk_size) + + # extend sample if we're unsure of the line ending + if sample.end_with?(cr) + sample << (@input.gets(nil, 1) || "") + end + + @samples << sample + + separator = detect_row_separator(sample, cr, lf) + end + rescue IOError + # do nothing: ensure will set default + end + end + separator = InputRecordSeparator.value if separator == :auto + end + separator.to_s.encode(@encoding) + end + + def detect_row_separator(sample, cr, lf) + lf_index = sample.index(lf) + if lf_index + cr_index = sample[0, lf_index].index(cr) + else + cr_index = sample.index(cr) + end + if cr_index and lf_index + if cr_index + 1 == lf_index + cr + lf + elsif cr_index < lf_index + cr + else + lf + end + elsif cr_index + cr + elsif lf_index + lf + else + :auto + end + end + + def prepare_line + @lineno = 0 + @last_line = nil + @scanner = nil + end + + def last_line + if @scanner + @last_line ||= @scanner.keep_end + else + @last_line + end + end + + def prepare_header + @return_headers = @options[:return_headers] + + headers = @options[:headers] + case headers + when Array + @raw_headers = headers + quoted_fields = [false] * @raw_headers.size + @use_headers = true + when String + @raw_headers, quoted_fields = parse_headers(headers) + @use_headers = true + when nil, false + @raw_headers = nil + @use_headers = false + else + @raw_headers = nil + @use_headers = true + end + if @raw_headers + @headers = adjust_headers(@raw_headers, quoted_fields) + else + @headers = nil + end + end + + def parse_headers(row) + quoted_fields = [] + converter = lambda do |field, info| + quoted_fields << info.quoted? + field + end + headers = CSV.parse_line(row, + col_sep: @column_separator, + row_sep: @row_separator, + quote_char: @quote_character, + converters: [converter]) + [headers, quoted_fields] + end + + def adjust_headers(headers, quoted_fields) + adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields) + adjusted_headers.each {|h| h.freeze if h.is_a? String} + adjusted_headers + end + + def prepare_parser + @may_quoted = may_quoted? + end + + def may_quoted? + return false if @quote_character.nil? + + if @input.is_a?(StringIO) + pos = @input.pos + sample = @input.read + @input.seek(pos) + else + return false if @samples.empty? + sample = @samples.first + end + sample[0, 128].index(@quote_character) + end + + class UnoptimizedStringIO # :nodoc: + def initialize(string) + @io = StringIO.new(string, "rb:#{string.encoding}") + end + + def gets(*args) + @io.gets(*args) + end + + def each_line(*args, &block) + @io.each_line(*args, &block) + end + + def eof? + @io.eof? + end + end + + SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") + if SCANNER_TEST + SCANNER_TEST_CHUNK_SIZE_NAME = "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE" + SCANNER_TEST_CHUNK_SIZE_VALUE = ENV[SCANNER_TEST_CHUNK_SIZE_NAME] + def build_scanner + inputs = @samples.collect do |sample| + UnoptimizedStringIO.new(sample) + end + if @input.is_a?(StringIO) + inputs << UnoptimizedStringIO.new(@input.read) + else + inputs << @input + end + begin + chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME] + rescue # Ractor::IsolationError + # Ractor on Ruby 3.0 can't read ENV value. + chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE + end + chunk_size = Integer((chunk_size_value || "1"), 10) + InputsScanner.new(inputs, + @encoding, + @row_separator, + chunk_size: chunk_size) + end + else + def build_scanner + string = nil + if @samples.empty? and @input.is_a?(StringIO) + string = @input.read + elsif @samples.size == 1 and + @input != ARGF and + @input.respond_to?(:eof?) and + @input.eof? + string = @samples[0] + end + if string + unless string.valid_encoding? + index = string.lines(@row_separator).index do |line| + !line.valid_encoding? + end + if index + message = "Invalid byte sequence in #{@encoding}" + raise MalformedCSVError.new(message, @lineno + index + 1) + end + end + Scanner.new(string) + else + inputs = @samples.collect do |sample| + StringIO.new(sample) + end + inputs << @input + InputsScanner.new(inputs, @encoding, @row_separator) + end + end + end + + def skip_needless_lines + return unless @skip_lines + + until @scanner.eos? + @scanner.keep_start + line = @scanner.scan_all(@not_line_end) || "".encode(@encoding) + line << @row_separator if parse_row_end + if skip_line?(line) + @lineno += 1 + @scanner.keep_drop + else + @scanner.keep_back + return + end + end + end + + def skip_line?(line) + line = line.delete_suffix(@row_separator) + case @skip_lines + when String + line.include?(@skip_lines) + when Regexp + @skip_lines.match?(line) + else + @skip_lines.match(line) + end + end + + def validate_field_size(field) + return unless @max_field_size + return if field.size <= @max_field_size + ignore_broken_line + message = "Field size exceeded: #{field.size} > #{@max_field_size}" + raise MalformedCSVError.new(message, @lineno) + end + + def parse_no_quote(&block) + @scanner.each_line(@row_separator) do |line| + next if @skip_lines and skip_line?(line) + original_line = line + line = line.delete_suffix(@row_separator) + + if line.empty? + next if @skip_blanks + row = [] + quoted_fields = [] + else + line = strip_value(line) + row = line.split(@split_column_separator, -1) + quoted_fields = [false] * row.size + if @max_field_size + row.each do |column| + validate_field_size(column) + end + end + n_columns = row.size + i = 0 + while i < n_columns + row[i] = nil if row[i].empty? + i += 1 + end + end + @last_line = original_line + emit_row(row, quoted_fields, &block) + end + end + + def parse_quotable_loose(&block) + @scanner.keep_start + @scanner.each_line(@row_separator) do |line| + if @skip_lines and skip_line?(line) + @scanner.keep_drop + @scanner.keep_start + next + end + original_line = line + line = line.delete_suffix(@row_separator) + + if line.empty? + if @skip_blanks + @scanner.keep_drop + @scanner.keep_start + next + end + row = [] + quoted_fields = [] + elsif line.include?(@cr) or line.include?(@lf) + @scanner.keep_back + @need_robust_parsing = true + return parse_quotable_robust(&block) + else + row = line.split(@split_column_separator, -1) + quoted_fields = [] + n_columns = row.size + i = 0 + while i < n_columns + column = row[i] + if column.empty? + quoted_fields << false + row[i] = nil + else + n_quotes = column.count(@quote_character) + if n_quotes.zero? + quoted_fields << false + # no quote + elsif n_quotes == 2 and + column.start_with?(@quote_character) and + column.end_with?(@quote_character) + quoted_fields << true + row[i] = column[1..-2] + else + @scanner.keep_back + @need_robust_parsing = true + return parse_quotable_robust(&block) + end + validate_field_size(row[i]) + end + i += 1 + end + end + @scanner.keep_drop + @scanner.keep_start + @last_line = original_line + emit_row(row, quoted_fields, &block) + end + @scanner.keep_drop + end + + def parse_quotable_robust(&block) + row = [] + quoted_fields = [] + skip_needless_lines + start_row + while true + @quoted_column_value = false + @unquoted_column_value = false + @scanner.scan_all(@strip_value) if @strip_value + value = parse_column_value + if value + @scanner.scan_all(@strip_value) if @strip_value + validate_field_size(value) + end + if parse_column_end + row << value + quoted_fields << @quoted_column_value + elsif parse_row_end + if row.empty? and value.nil? + emit_row([], [], &block) unless @skip_blanks + else + row << value + quoted_fields << @quoted_column_value + emit_row(row, quoted_fields, &block) + row = [] + quoted_fields = [] + end + skip_needless_lines + start_row + elsif @scanner.eos? + break if row.empty? and value.nil? + row << value + quoted_fields << @quoted_column_value + emit_row(row, quoted_fields, &block) + break + else + if @quoted_column_value + if liberal_parsing? and (new_line = @scanner.check(@line_end)) + message = + "Illegal end-of-line sequence outside of a quoted field " + + "<#{new_line.inspect}>" + else + message = "Any value after quoted field isn't allowed" + end + ignore_broken_line + raise MalformedCSVError.new(message, @lineno) + elsif @unquoted_column_value and + (new_line = @scanner.scan(@line_end)) + ignore_broken_line + message = "Unquoted fields do not allow new line " + + "<#{new_line.inspect}>" + raise MalformedCSVError.new(message, @lineno) + elsif @scanner.rest.start_with?(@quote_character) + ignore_broken_line + message = "Illegal quoting" + raise MalformedCSVError.new(message, @lineno) + elsif (new_line = @scanner.scan(@line_end)) + ignore_broken_line + message = "New line must be <#{@row_separator.inspect}> " + + "not <#{new_line.inspect}>" + raise MalformedCSVError.new(message, @lineno) + else + ignore_broken_line + raise MalformedCSVError.new("TODO: Meaningful message", + @lineno) + end + end + end + end + + def parse_column_value + if @liberal_parsing + quoted_value = parse_quoted_column_value + if quoted_value + @scanner.scan_all(@strip_value) if @strip_value + unquoted_value = parse_unquoted_column_value + if unquoted_value + if @double_quote_outside_quote + unquoted_value = unquoted_value.gsub(@quote_character * 2, + @quote_character) + if quoted_value.empty? # %Q{""...} case + return @quote_character + unquoted_value + end + end + @quote_character + quoted_value + @quote_character + unquoted_value + else + quoted_value + end + else + parse_unquoted_column_value + end + elsif @may_quoted + parse_quoted_column_value || + parse_unquoted_column_value + else + parse_unquoted_column_value || + parse_quoted_column_value + end + end + + def parse_unquoted_column_value + value = @scanner.scan_all(@unquoted_value) + return nil unless value + + @unquoted_column_value = true + if @first_column_separators + while true + @scanner.keep_start + is_column_end = @column_ends.all? do |column_end| + @scanner.scan(column_end) + end + @scanner.keep_back + break if is_column_end + sub_separator = @scanner.scan_all(@first_column_separators) + break if sub_separator.nil? + value << sub_separator + sub_value = @scanner.scan_all(@unquoted_value) + break if sub_value.nil? + value << sub_value + end + end + value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote + if @rstrip_value + value.gsub!(@rstrip_value, "") + end + value + end + + def parse_quoted_column_value + quotes = @scanner.scan_all(@quotes) + return nil unless quotes + + @quoted_column_value = true + n_quotes = quotes.size + if (n_quotes % 2).zero? + quotes[0, (n_quotes - 2) / 2] + else + value = quotes[0, n_quotes / 2] + while true + quoted_value = @scanner.scan_all(@quoted_value) + value << quoted_value if quoted_value + if @backslash_quote + if @scanner.scan(@escaped_backslash) + if @scanner.scan(@escaped_quote) + value << @quote_character + else + value << @backslash_character + end + next + end + end + + quotes = @scanner.scan_all(@quotes) + unless quotes + ignore_broken_line + message = "Unclosed quoted field" + raise MalformedCSVError.new(message, @lineno) + end + n_quotes = quotes.size + if n_quotes == 1 + break + else + value << quotes[0, n_quotes / 2] + break if (n_quotes % 2) == 1 + end + end + value + end + end + + def parse_column_end + return true if @scanner.scan(@column_end) + return false unless @column_ends + + @scanner.keep_start + if @column_ends.all? {|column_end| @scanner.scan(column_end)} + @scanner.keep_drop + true + else + @scanner.keep_back + false + end + end + + def parse_row_end + return true if @scanner.scan(@row_end) + return false unless @row_ends + @scanner.keep_start + if @row_ends.all? {|row_end| @scanner.scan(row_end)} + @scanner.keep_drop + true + else + @scanner.keep_back + false + end + end + + def strip_value(value) + return value unless @strip + return value if value.nil? + + case @strip + when String + while value.delete_prefix!(@strip) + # do nothing + end + while value.delete_suffix!(@strip) + # do nothing + end + else + value.strip! + end + value + end + + def ignore_broken_line + @scanner.scan_all(@not_line_end) + @scanner.scan_all(@line_end) + @lineno += 1 + end + + def start_row + if @last_line + @last_line = nil + else + @scanner.keep_drop + end + @scanner.keep_start + end + + def emit_row(row, quoted_fields, &block) + @lineno += 1 + + raw_row = row + if @use_headers + if @headers.nil? + @headers = adjust_headers(row, quoted_fields) + return unless @return_headers + row = Row.new(@headers, row, true) + else + row = Row.new(@headers, + @fields_converter.convert(raw_row, @headers, @lineno, quoted_fields)) + end + else + # convert fields, if needed... + row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields) + end + + # inject unconverted fields and accessor, if requested... + if @unconverted_fields and not row.respond_to?(:unconverted_fields) + add_unconverted_fields(row, raw_row) + end + + yield(row) + end + + # This method injects an instance variable <tt>unconverted_fields</tt> into + # +row+ and an accessor method for +row+ called unconverted_fields(). The + # variable is set to the contents of +fields+. + def add_unconverted_fields(row, fields) + class << row + attr_reader :unconverted_fields + end + row.instance_variable_set(:@unconverted_fields, fields) + row + end + end +end diff --git a/lib/csv/row.rb b/lib/csv/row.rb new file mode 100644 index 0000000000..500adb1882 --- /dev/null +++ b/lib/csv/row.rb @@ -0,0 +1,757 @@ +# frozen_string_literal: true + +require "forwardable" + +class CSV + # = \CSV::Row + # A \CSV::Row instance represents a \CSV table row. + # (see {class CSV}[../CSV.html]). + # + # The instance may have: + # - Fields: each is an object, not necessarily a \String. + # - Headers: each serves a key, and also need not be a \String. + # + # === Instance Methods + # + # \CSV::Row has three groups of instance methods: + # - Its own internally defined instance methods. + # - Methods included by module Enumerable. + # - Methods delegated to class Array.: + # * Array#empty? + # * Array#length + # * Array#size + # + # == Creating a \CSV::Row Instance + # + # Commonly, a new \CSV::Row instance is created by parsing \CSV source + # that has headers: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.each {|row| p row } + # Output: + # #<CSV::Row "Name":"foo" "Value":"0"> + # #<CSV::Row "Name":"bar" "Value":"1"> + # #<CSV::Row "Name":"baz" "Value":"2"> + # + # You can also create a row directly. See ::new. + # + # == Headers + # + # Like a \CSV::Table, a \CSV::Row has headers. + # + # A \CSV::Row that was created by parsing \CSV source + # inherits its headers from the table: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table.first + # row.headers # => ["Name", "Value"] + # + # You can also create a new row with headers; + # like the keys in a \Hash, the headers need not be Strings: + # row = CSV::Row.new([:name, :value], ['foo', 0]) + # row.headers # => [:name, :value] + # + # The new row retains its headers even if added to a table + # that has headers: + # table << row # => #<CSV::Table mode:col_or_row row_count:5> + # row.headers # => [:name, :value] + # row[:name] # => "foo" + # row['Name'] # => nil + # + # + # + # == Accessing Fields + # + # You may access a field in a \CSV::Row with either its \Integer index + # (\Array-style) or its header (\Hash-style). + # + # Fetch a field using method #[]: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) + # row[1] # => 0 + # row['Value'] # => 0 + # + # Set a field using method #[]=: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) + # row # => #<CSV::Row "Name":"foo" "Value":0> + # row[0] = 'bar' + # row['Value'] = 1 + # row # => #<CSV::Row "Name":"bar" "Value":1> + # + class Row + # :call-seq: + # CSV::Row.new(headers, fields, header_row = false) -> csv_row + # + # Returns the new \CSV::Row instance constructed from + # arguments +headers+ and +fields+; both should be Arrays; + # note that the fields need not be Strings: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) + # row # => #<CSV::Row "Name":"foo" "Value":0> + # + # If the \Array lengths are different, the shorter is +nil+-filled: + # row = CSV::Row.new(['Name', 'Value', 'Date', 'Size'], ['foo', 0]) + # row # => #<CSV::Row "Name":"foo" "Value":0 "Date":nil "Size":nil> + # + # Each \CSV::Row object is either a <i>field row</i> or a <i>header row</i>; + # by default, a new row is a field row; for the row created above: + # row.field_row? # => true + # row.header_row? # => false + # + # If the optional argument +header_row+ is given as +true+, + # the created row is a header row: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0], header_row = true) + # row # => #<CSV::Row "Name":"foo" "Value":0> + # row.field_row? # => false + # row.header_row? # => true + def initialize(headers, fields, header_row = false) + @header_row = header_row + headers.each { |h| h.freeze if h.is_a? String } + + # handle extra headers or fields + @row = if headers.size >= fields.size + headers.zip(fields) + else + fields.zip(headers).each(&:reverse!) + end + end + + # Internal data format used to compare equality. + attr_reader :row + protected :row + + ### Array Delegation ### + + extend Forwardable + def_delegators :@row, :empty?, :length, :size + + # :call-seq: + # row.initialize_copy(other_row) -> self + # + # Calls superclass method. + def initialize_copy(other) + super_return_value = super + @row = @row.collect(&:dup) + super_return_value + end + + # :call-seq: + # row.header_row? -> true or false + # + # Returns +true+ if this is a header row, +false+ otherwise. + def header_row? + @header_row + end + + # :call-seq: + # row.field_row? -> true or false + # + # Returns +true+ if this is a field row, +false+ otherwise. + def field_row? + not header_row? + end + + # :call-seq: + # row.headers -> array_of_headers + # + # Returns the headers for this row: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table.first + # row.headers # => ["Name", "Value"] + def headers + @row.map(&:first) + end + + # :call-seq: + # field(index) -> value + # field(header) -> value + # field(header, offset) -> value + # + # Returns the field value for the given +index+ or +header+. + # + # --- + # + # Fetch field value by \Integer index: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.field(0) # => "foo" + # row.field(1) # => "bar" + # + # Counts backward from the last column if +index+ is negative: + # row.field(-1) # => "0" + # row.field(-2) # => "foo" + # + # Returns +nil+ if +index+ is out of range: + # row.field(2) # => nil + # row.field(-3) # => nil + # + # --- + # + # Fetch field value by header (first found): + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.field('Name') # => "Foo" + # + # Fetch field value by header, ignoring +offset+ leading fields: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.field('Name', 2) # => "Baz" + # + # Returns +nil+ if the header does not exist. + def field(header_or_index, minimum_index = 0) + # locate the pair + finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc + pair = @row[minimum_index..-1].public_send(finder, header_or_index) + + # return the field if we have a pair + if pair.nil? + nil + else + header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last + end + end + alias_method :[], :field + + # + # :call-seq: + # fetch(header) -> value + # fetch(header, default) -> value + # fetch(header) {|row| ... } -> value + # + # Returns the field value as specified by +header+. + # + # --- + # + # With the single argument +header+, returns the field value + # for that header (first found): + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.fetch('Name') # => "Foo" + # + # Raises exception +KeyError+ if the header does not exist. + # + # --- + # + # With arguments +header+ and +default+ given, + # returns the field value for the header (first found) + # if the header exists, otherwise returns +default+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.fetch('Name', '') # => "Foo" + # row.fetch(:nosuch, '') # => "" + # + # --- + # + # With argument +header+ and a block given, + # returns the field value for the header (first found) + # if the header exists; otherwise calls the block + # and returns its return value: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.fetch('Name') {|header| fail 'Cannot happen' } # => "Foo" + # row.fetch(:nosuch) {|header| "Header '#{header} not found'" } # => "Header 'nosuch not found'" + def fetch(header, *varargs) + raise ArgumentError, "Too many arguments" if varargs.length > 1 + pair = @row.assoc(header) + if pair + pair.last + else + if block_given? + yield header + elsif varargs.empty? + raise KeyError, "key not found: #{header}" + else + varargs.first + end + end + end + + # :call-seq: + # row.has_key?(header) -> true or false + # + # Returns +true+ if there is a field with the given +header+, + # +false+ otherwise. + def has_key?(header) + !!@row.assoc(header) + end + alias_method :include?, :has_key? + alias_method :key?, :has_key? + alias_method :member?, :has_key? + alias_method :header?, :has_key? + + # + # :call-seq: + # row[index] = value -> value + # row[header, offset] = value -> value + # row[header] = value -> value + # + # Assigns the field value for the given +index+ or +header+; + # returns +value+. + # + # --- + # + # Assign field value by \Integer index: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row[0] = 'Bat' + # row[1] = 3 + # row # => #<CSV::Row "Name":"Bat" "Value":3> + # + # Counts backward from the last column if +index+ is negative: + # row[-1] = 4 + # row[-2] = 'Bam' + # row # => #<CSV::Row "Name":"Bam" "Value":4> + # + # Extends the row with <tt>nil:nil</tt> if positive +index+ is not in the row: + # row[4] = 5 + # row # => #<CSV::Row "Name":"bad" "Value":4 nil:nil nil:nil nil:5> + # + # Raises IndexError if negative +index+ is too small (too far from zero). + # + # --- + # + # Assign field value by header (first found): + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row['Name'] = 'Bat' + # row # => #<CSV::Row "Name":"Bat" "Name":"Bar" "Name":"Baz"> + # + # Assign field value by header, ignoring +offset+ leading fields: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row['Name', 2] = 4 + # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":4> + # + # Append new field by (new) header: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row['New'] = 6 + # row# => #<CSV::Row "Name":"foo" "Value":"0" "New":6> + def []=(*args) + value = args.pop + + if args.first.is_a? Integer + if @row[args.first].nil? # extending past the end with index + @row[args.first] = [nil, value] + @row.map! { |pair| pair.nil? ? [nil, nil] : pair } + else # normal index assignment + @row[args.first][1] = value + end + else + index = index(*args) + if index.nil? # appending a field + self << [args.first, value] + else # normal header assignment + @row[index][1] = value + end + end + end + + # + # :call-seq: + # row << [header, value] -> self + # row << hash -> self + # row << value -> self + # + # Adds a field to +self+; returns +self+: + # + # If the argument is a 2-element \Array <tt>[header, value]</tt>, + # a field is added with the given +header+ and +value+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row << ['NAME', 'Bat'] + # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" "NAME":"Bat"> + # + # If the argument is a \Hash, each <tt>key-value</tt> pair is added + # as a field with header +key+ and value +value+. + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row << {NAME: 'Bat', name: 'Bam'} + # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" NAME:"Bat" name:"Bam"> + # + # Otherwise, the given +value+ is added as a field with no header. + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row << 'Bag' + # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" nil:"Bag"> + def <<(arg) + if arg.is_a?(Array) and arg.size == 2 # appending a header and name + @row << arg + elsif arg.is_a?(Hash) # append header and name pairs + arg.each { |pair| @row << pair } + else # append field value + @row << [nil, arg] + end + + self # for chaining + end + + # :call-seq: + # row.push(*values) -> self + # + # Appends each of the given +values+ to +self+ as a field; returns +self+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.push('Bat', 'Bam') + # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" nil:"Bat" nil:"Bam"> + def push(*args) + args.each { |arg| self << arg } + + self # for chaining + end + + # + # :call-seq: + # delete(index) -> [header, value] or nil + # delete(header) -> [header, value] or empty_array + # delete(header, offset) -> [header, value] or empty_array + # + # Removes a specified field from +self+; returns the 2-element \Array + # <tt>[header, value]</tt> if the field exists. + # + # If an \Integer argument +index+ is given, + # removes and returns the field at offset +index+, + # or returns +nil+ if the field does not exist: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.delete(1) # => ["Name", "Bar"] + # row.delete(50) # => nil + # + # Otherwise, if the single argument +header+ is given, + # removes and returns the first-found field with the given header, + # of returns a new empty \Array if the field does not exist: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.delete('Name') # => ["Name", "Foo"] + # row.delete('NAME') # => [] + # + # If argument +header+ and \Integer argument +offset+ are given, + # removes and returns the first-found field with the given header + # whose +index+ is at least as large as +offset+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.delete('Name', 1) # => ["Name", "Bar"] + # row.delete('NAME', 1) # => [] + def delete(header_or_index, minimum_index = 0) + if header_or_index.is_a? Integer # by index + @row.delete_at(header_or_index) + elsif i = index(header_or_index, minimum_index) # by header + @row.delete_at(i) + else + [ ] + end + end + + # :call-seq: + # row.delete_if {|header, value| ... } -> self + # + # Removes fields from +self+ as selected by the block; returns +self+. + # + # Removes each field for which the block returns a truthy value: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.delete_if {|header, value| value.start_with?('B') } # => true + # row # => #<CSV::Row "Name":"Foo"> + # row.delete_if {|header, value| header.start_with?('B') } # => false + # + # If no block is given, returns a new Enumerator: + # row.delete_if # => #<Enumerator: #<CSV::Row "Name":"Foo">:delete_if> + def delete_if(&block) + return enum_for(__method__) { size } unless block_given? + + @row.delete_if(&block) + + self # for chaining + end + + # :call-seq: + # self.fields(*specifiers) -> array_of_fields + # + # Returns field values per the given +specifiers+, which may be any mixture of: + # - \Integer index. + # - \Range of \Integer indexes. + # - 2-element \Array containing a header and offset. + # - Header. + # - \Range of headers. + # + # For +specifier+ in one of the first four cases above, + # returns the result of <tt>self.field(specifier)</tt>; see #field. + # + # Although there may be any number of +specifiers+, + # the examples here will illustrate one at a time. + # + # When the specifier is an \Integer +index+, + # returns <tt>self.field(index)</tt>L + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.fields(1) # => ["Bar"] + # + # When the specifier is a \Range of \Integers +range+, + # returns <tt>self.field(range)</tt>: + # row.fields(1..2) # => ["Bar", "Baz"] + # + # When the specifier is a 2-element \Array +array+, + # returns <tt>self.field(array)</tt>L + # row.fields('Name', 1) # => ["Foo", "Bar"] + # + # When the specifier is a header +header+, + # returns <tt>self.field(header)</tt>L + # row.fields('Name') # => ["Foo"] + # + # When the specifier is a \Range of headers +range+, + # forms a new \Range +new_range+ from the indexes of + # <tt>range.start</tt> and <tt>range.end</tt>, + # and returns <tt>self.field(new_range)</tt>: + # source = "Name,NAME,name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.fields('Name'..'NAME') # => ["Foo", "Bar"] + # + # Returns all fields if no argument given: + # row.fields # => ["Foo", "Bar", "Baz"] + def fields(*headers_and_or_indices) + if headers_and_or_indices.empty? # return all fields--no arguments + @row.map(&:last) + else # or work like values_at() + all = [] + headers_and_or_indices.each do |h_or_i| + if h_or_i.is_a? Range + index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin : + index(h_or_i.begin) + index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end : + index(h_or_i.end) + new_range = h_or_i.exclude_end? ? (index_begin...index_end) : + (index_begin..index_end) + all.concat(fields.values_at(new_range)) + else + all << field(*Array(h_or_i)) + end + end + return all + end + end + alias_method :values_at, :fields + + # :call-seq: + # index(header) -> index + # index(header, offset) -> index + # + # Returns the index for the given header, if it exists; + # otherwise returns +nil+. + # + # With the single argument +header+, returns the index + # of the first-found field with the given +header+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.index('Name') # => 0 + # row.index('NAME') # => nil + # + # With arguments +header+ and +offset+, + # returns the index of the first-found field with given +header+, + # but ignoring the first +offset+ fields: + # row.index('Name', 1) # => 1 + # row.index('Name', 3) # => nil + def index(header, minimum_index = 0) + # find the pair + index = headers[minimum_index..-1].index(header) + # return the index at the right offset, if we found one + index.nil? ? nil : index + minimum_index + end + + # :call-seq: + # row.field?(value) -> true or false + # + # Returns +true+ if +value+ is a field in this row, +false+ otherwise: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.field?('Bar') # => true + # row.field?('BAR') # => false + def field?(data) + fields.include? data + end + + include Enumerable + + # :call-seq: + # row.each {|header, value| ... } -> self + # + # Calls the block with each header-value pair; returns +self+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.each {|header, value| p [header, value] } + # Output: + # ["Name", "Foo"] + # ["Name", "Bar"] + # ["Name", "Baz"] + # + # If no block is given, returns a new Enumerator: + # row.each # => #<Enumerator: #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz">:each> + def each(&block) + return enum_for(__method__) { size } unless block_given? + + @row.each(&block) + + self # for chaining + end + + alias_method :each_pair, :each + + # :call-seq: + # row == other -> true or false + # + # Returns +true+ if +other+ is a /CSV::Row that has the same + # fields (headers and values) in the same order as +self+; + # otherwise returns +false+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # other_row = table[0] + # row == other_row # => true + # other_row = table[1] + # row == other_row # => false + def ==(other) + return @row == other.row if other.is_a? CSV::Row + @row == other + end + + # :call-seq: + # row.to_h -> hash + # + # Returns the new \Hash formed by adding each header-value pair in +self+ + # as a key-value pair in the \Hash. + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.to_h # => {"Name"=>"foo", "Value"=>"0"} + # + # Header order is preserved, but repeated headers are ignored: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.to_h # => {"Name"=>"Foo"} + def to_h + hash = {} + each do |key, _value| + hash[key] = self[key] unless hash.key?(key) + end + hash + end + alias_method :to_hash, :to_h + + # :call-seq: + # row.deconstruct_keys(keys) -> hash + # + # Returns the new \Hash suitable for pattern matching containing only the + # keys specified as an argument. + def deconstruct_keys(keys) + if keys.nil? + to_h + else + keys.to_h { |key| [key, self[key]] } + end + end + + alias_method :to_ary, :to_a + + # :call-seq: + # row.deconstruct -> array + # + # Returns the new \Array suitable for pattern matching containing the values + # of the row. + def deconstruct + fields + end + + # :call-seq: + # row.to_csv -> csv_string + # + # Returns the row as a \CSV String. Headers are not included: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.to_csv # => "foo,0\n" + def to_csv(**options) + fields.to_csv(**options) + end + alias_method :to_s, :to_csv + + # :call-seq: + # row.dig(index_or_header, *identifiers) -> object + # + # Finds and returns the object in nested object that is specified + # by +index_or_header+ and +specifiers+. + # + # The nested objects may be instances of various classes. + # See {Dig Methods}[https://docs.ruby-lang.org/en/master/dig_methods_rdoc.html]. + # + # Examples: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.dig(1) # => "0" + # row.dig('Value') # => "0" + # row.dig(5) # => nil + def dig(index_or_header, *indexes) + value = field(index_or_header) + if value.nil? + nil + elsif indexes.empty? + value + else + unless value.respond_to?(:dig) + raise TypeError, "#{value.class} does not have \#dig method" + end + value.dig(*indexes) + end + end + + # :call-seq: + # row.inspect -> string + # + # Returns an ASCII-compatible \String showing: + # - Class \CSV::Row. + # - Header-value pairs. + # Example: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.inspect # => "#<CSV::Row \"Name\":\"foo\" \"Value\":\"0\">" + def inspect + str = ["#<", self.class.to_s] + each do |header, field| + str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) << + ":" << field.inspect + end + str << ">" + begin + str.join('') + rescue # any encoding error + str.map do |s| + e = Encoding::Converter.asciicompat_encoding(s.encoding) + e ? s.encode(e) : s.force_encoding("ASCII-8BIT") + end.join('') + end + end + end +end diff --git a/lib/csv/table.rb b/lib/csv/table.rb new file mode 100644 index 0000000000..90af50869d --- /dev/null +++ b/lib/csv/table.rb @@ -0,0 +1,1056 @@ +# frozen_string_literal: true + +require "forwardable" + +class CSV + # = \CSV::Table + # A \CSV::Table instance represents \CSV data. + # (see {class CSV}[../CSV.html]). + # + # The instance may have: + # - Rows: each is a Table::Row object. + # - Headers: names for the columns. + # + # === Instance Methods + # + # \CSV::Table has three groups of instance methods: + # - Its own internally defined instance methods. + # - Methods included by module Enumerable. + # - Methods delegated to class Array.: + # * Array#empty? + # * Array#length + # * Array#size + # + # == Creating a \CSV::Table Instance + # + # Commonly, a new \CSV::Table instance is created by parsing \CSV source + # using headers: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.class # => CSV::Table + # + # You can also create an instance directly. See ::new. + # + # == Headers + # + # If a table has headers, the headers serve as labels for the columns of data. + # Each header serves as the label for its column. + # + # The headers for a \CSV::Table object are stored as an \Array of Strings. + # + # Commonly, headers are defined in the first row of \CSV source: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.headers # => ["Name", "Value"] + # + # If no headers are defined, the \Array is empty: + # table = CSV::Table.new([]) + # table.headers # => [] + # + # == Access Modes + # + # \CSV::Table provides three modes for accessing table data: + # - \Row mode. + # - Column mode. + # - Mixed mode (the default for a new table). + # + # The access mode for a\CSV::Table instance affects the behavior + # of some of its instance methods: + # - #[] + # - #[]= + # - #delete + # - #delete_if + # - #each + # - #values_at + # + # === \Row Mode + # + # Set a table to row mode with method #by_row!: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # + # Specify a single row by an \Integer index: + # # Get a row. + # table[1] # => #<CSV::Row "Name":"bar" "Value":"1"> + # # Set a row, then get it. + # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3]) + # table[1] # => #<CSV::Row "Name":"bam" "Value":3> + # + # Specify a sequence of rows by a \Range: + # # Get rows. + # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">] + # # Set rows, then get them. + # table[1..2] = [ + # CSV::Row.new(['Name', 'Value'], ['bat', 4]), + # CSV::Row.new(['Name', 'Value'], ['bad', 5]), + # ] + # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]] + # + # === Column Mode + # + # Set a table to column mode with method #by_col!: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # + # Specify a column by an \Integer index: + # # Get a column. + # table[0] + # # Set a column, then get it. + # table[0] = ['FOO', 'BAR', 'BAZ'] + # table[0] # => ["FOO", "BAR", "BAZ"] + # + # Specify a column by its \String header: + # # Get a column. + # table['Name'] # => ["FOO", "BAR", "BAZ"] + # # Set a column, then get it. + # table['Name'] = ['Foo', 'Bar', 'Baz'] + # table['Name'] # => ["Foo", "Bar", "Baz"] + # + # === Mixed Mode + # + # In mixed mode, you can refer to either rows or columns: + # - An \Integer index refers to a row. + # - A \Range index refers to multiple rows. + # - A \String index refers to a column. + # + # Set a table to mixed mode with method #by_col_or_row!: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> + # + # Specify a single row by an \Integer index: + # # Get a row. + # table[1] # => #<CSV::Row "Name":"bar" "Value":"1"> + # # Set a row, then get it. + # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3]) + # table[1] # => #<CSV::Row "Name":"bam" "Value":3> + # + # Specify a sequence of rows by a \Range: + # # Get rows. + # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">] + # # Set rows, then get them. + # table[1] = CSV::Row.new(['Name', 'Value'], ['bat', 4]) + # table[2] = CSV::Row.new(['Name', 'Value'], ['bad', 5]) + # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]] + # + # Specify a column by its \String header: + # # Get a column. + # table['Name'] # => ["foo", "bat", "bad"] + # # Set a column, then get it. + # table['Name'] = ['Foo', 'Bar', 'Baz'] + # table['Name'] # => ["Foo", "Bar", "Baz"] + class Table + # :call-seq: + # CSV::Table.new(array_of_rows, headers = nil) -> csv_table + # + # Returns a new \CSV::Table object. + # + # - Argument +array_of_rows+ must be an \Array of CSV::Row objects. + # - Argument +headers+, if given, may be an \Array of Strings. + # + # --- + # + # Create an empty \CSV::Table object: + # table = CSV::Table.new([]) + # table # => #<CSV::Table mode:col_or_row row_count:1> + # + # Create a non-empty \CSV::Table object: + # rows = [ + # CSV::Row.new([], []), + # CSV::Row.new([], []), + # CSV::Row.new([], []), + # ] + # table = CSV::Table.new(rows) + # table # => #<CSV::Table mode:col_or_row row_count:4> + # + # --- + # + # If argument +headers+ is an \Array of Strings, + # those Strings become the table's headers: + # table = CSV::Table.new([], headers: ['Name', 'Age']) + # table.headers # => ["Name", "Age"] + # + # If argument +headers+ is not given and the table has rows, + # the headers are taken from the first row: + # rows = [ + # CSV::Row.new(['Foo', 'Bar'], []), + # CSV::Row.new(['foo', 'bar'], []), + # CSV::Row.new(['FOO', 'BAR'], []), + # ] + # table = CSV::Table.new(rows) + # table.headers # => ["Foo", "Bar"] + # + # If argument +headers+ is not given and the table is empty (has no rows), + # the headers are also empty: + # table = CSV::Table.new([]) + # table.headers # => [] + # + # --- + # + # Raises an exception if argument +array_of_rows+ is not an \Array object: + # # Raises NoMethodError (undefined method `first' for :foo:Symbol): + # CSV::Table.new(:foo) + # + # Raises an exception if an element of +array_of_rows+ is not a \CSV::Table object: + # # Raises NoMethodError (undefined method `headers' for :foo:Symbol): + # CSV::Table.new([:foo]) + def initialize(array_of_rows, headers: nil) + @table = array_of_rows + @headers = headers + unless @headers + if @table.empty? + @headers = [] + else + @headers = @table.first.headers + end + end + + @mode = :col_or_row + end + + # The current access mode for indexing and iteration. + attr_reader :mode + + # Internal data format used to compare equality. + attr_reader :table + protected :table + + ### Array Delegation ### + + extend Forwardable + def_delegators :@table, :empty?, :length, :size + + # :call-seq: + # table.by_col -> table_dup + # + # Returns a duplicate of +self+, in column mode + # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]): + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # dup_table = table.by_col + # dup_table.mode # => :col + # dup_table.equal?(table) # => false # It's a dup + # + # This may be used to chain method calls without changing the mode + # (but also will affect performance and memory usage): + # dup_table.by_col['Name'] + # + # Also note that changes to the duplicate table will not affect the original. + def by_col + self.class.new(@table.dup).by_col! + end + + # :call-seq: + # table.by_col! -> self + # + # Sets the mode for +self+ to column mode + # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]); returns +self+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # table1 = table.by_col! + # table.mode # => :col + # table1.equal?(table) # => true # Returned self + def by_col! + @mode = :col + + self + end + + # :call-seq: + # table.by_col_or_row -> table_dup + # + # Returns a duplicate of +self+, in mixed mode + # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]): + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true).by_col! + # table.mode # => :col + # dup_table = table.by_col_or_row + # dup_table.mode # => :col_or_row + # dup_table.equal?(table) # => false # It's a dup + # + # This may be used to chain method calls without changing the mode + # (but also will affect performance and memory usage): + # dup_table.by_col_or_row['Name'] + # + # Also note that changes to the duplicate table will not affect the original. + def by_col_or_row + self.class.new(@table.dup).by_col_or_row! + end + + # :call-seq: + # table.by_col_or_row! -> self + # + # Sets the mode for +self+ to mixed mode + # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]); returns +self+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true).by_col! + # table.mode # => :col + # table1 = table.by_col_or_row! + # table.mode # => :col_or_row + # table1.equal?(table) # => true # Returned self + def by_col_or_row! + @mode = :col_or_row + + self + end + + # :call-seq: + # table.by_row -> table_dup + # + # Returns a duplicate of +self+, in row mode + # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]): + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # dup_table = table.by_row + # dup_table.mode # => :row + # dup_table.equal?(table) # => false # It's a dup + # + # This may be used to chain method calls without changing the mode + # (but also will affect performance and memory usage): + # dup_table.by_row[1] + # + # Also note that changes to the duplicate table will not affect the original. + def by_row + self.class.new(@table.dup).by_row! + end + + # :call-seq: + # table.by_row! -> self + # + # Sets the mode for +self+ to row mode + # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]); returns +self+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # table1 = table.by_row! + # table.mode # => :row + # table1.equal?(table) # => true # Returned self + def by_row! + @mode = :row + + self + end + + # :call-seq: + # table.headers -> array_of_headers + # + # Returns a new \Array containing the \String headers for the table. + # + # If the table is not empty, returns the headers from the first row: + # rows = [ + # CSV::Row.new(['Foo', 'Bar'], []), + # CSV::Row.new(['FOO', 'BAR'], []), + # CSV::Row.new(['foo', 'bar'], []), + # ] + # table = CSV::Table.new(rows) + # table.headers # => ["Foo", "Bar"] + # table.delete(0) + # table.headers # => ["FOO", "BAR"] + # table.delete(0) + # table.headers # => ["foo", "bar"] + # + # If the table is empty, returns a copy of the headers in the table itself: + # table.delete(0) + # table.headers # => ["Foo", "Bar"] + def headers + if @table.empty? + @headers.dup + else + @table.first.headers + end + end + + # :call-seq: + # table[n] -> row or column_data + # table[range] -> array_of_rows or array_of_column_data + # table[header] -> array_of_column_data + # + # Returns data from the table; does not modify the table. + # + # --- + # + # Fetch a \Row by Its \Integer Index:: + # - Form: <tt>table[n]</tt>, +n+ an integer. + # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>. + # - Return value: _nth_ row of the table, if that row exists; + # otherwise +nil+. + # + # Returns the _nth_ row of the table if that row exists: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # table[1] # => #<CSV::Row "Name":"bar" "Value":"1"> + # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> + # table[1] # => #<CSV::Row "Name":"bar" "Value":"1"> + # + # Counts backward from the last row if +n+ is negative: + # table[-1] # => #<CSV::Row "Name":"baz" "Value":"2"> + # + # Returns +nil+ if +n+ is too large or too small: + # table[4] # => nil + # table[-4] # => nil + # + # Raises an exception if the access mode is <tt>:row</tt> + # and +n+ is not an \Integer: + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # # Raises TypeError (no implicit conversion of String into Integer): + # table['Name'] + # + # --- + # + # Fetch a Column by Its \Integer Index:: + # - Form: <tt>table[n]</tt>, +n+ an \Integer. + # - Access mode: <tt>:col</tt>. + # - Return value: _nth_ column of the table, if that column exists; + # otherwise an \Array of +nil+ fields of length <tt>self.size</tt>. + # + # Returns the _nth_ column of the table if that column exists: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # table[1] # => ["0", "1", "2"] + # + # Counts backward from the last column if +n+ is negative: + # table[-2] # => ["foo", "bar", "baz"] + # + # Returns an \Array of +nil+ fields if +n+ is too large or too small: + # table[4] # => [nil, nil, nil] + # table[-4] # => [nil, nil, nil] + # + # --- + # + # Fetch Rows by \Range:: + # - Form: <tt>table[range]</tt>, +range+ a \Range object. + # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>. + # - Return value: rows from the table, beginning at row <tt>range.start</tt>, + # if those rows exists. + # + # Returns rows from the table, beginning at row <tt>range.first</tt>, + # if those rows exist: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1"> + # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] + # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> + # rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1"> + # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] + # + # If there are too few rows, returns all from <tt>range.start</tt> to the end: + # rows = table[1..50] # => #<CSV::Row "Name":"bar" "Value":"1"> + # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] + # + # Special case: if <tt>range.start == table.size</tt>, returns an empty \Array: + # table[table.size..50] # => [] + # + # If <tt>range.end</tt> is negative, calculates the ending index from the end: + # rows = table[0..-1] + # rows # => [#<CSV::Row "Name":"foo" "Value":"0">, #<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] + # + # If <tt>range.start</tt> is negative, calculates the starting index from the end: + # rows = table[-1..2] + # rows # => [#<CSV::Row "Name":"baz" "Value":"2">] + # + # If <tt>range.start</tt> is larger than <tt>table.size</tt>, returns +nil+: + # table[4..4] # => nil + # + # --- + # + # Fetch Columns by \Range:: + # - Form: <tt>table[range]</tt>, +range+ a \Range object. + # - Access mode: <tt>:col</tt>. + # - Return value: column data from the table, beginning at column <tt>range.start</tt>, + # if those columns exist. + # + # Returns column values from the table, if the column exists; + # the values are arranged by row: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! + # table[0..1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # Special case: if <tt>range.start == headers.size</tt>, + # returns an \Array (size: <tt>table.size</tt>) of empty \Arrays: + # table[table.headers.size..50] # => [[], [], []] + # + # If <tt>range.end</tt> is negative, calculates the ending index from the end: + # table[0..-1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # If <tt>range.start</tt> is negative, calculates the starting index from the end: + # table[-2..2] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # If <tt>range.start</tt> is larger than <tt>table.size</tt>, + # returns an \Array of +nil+ values: + # table[4..4] # => [nil, nil, nil] + # + # --- + # + # Fetch a Column by Its \String Header:: + # - Form: <tt>table[header]</tt>, +header+ a \String header. + # - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt> + # - Return value: column data from the table, if that +header+ exists. + # + # Returns column values from the table, if the column exists: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # table['Name'] # => ["foo", "bar", "baz"] + # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> + # col = table['Name'] + # col # => ["foo", "bar", "baz"] + # + # Modifying the returned column values does not modify the table: + # col[0] = 'bat' + # col # => ["bat", "bar", "baz"] + # table['Name'] # => ["foo", "bar", "baz"] + # + # Returns an \Array of +nil+ values if there is no such column: + # table['Nosuch'] # => [nil, nil, nil] + def [](index_or_header) + if @mode == :row or # by index + (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range))) + @table[index_or_header] + else # by header + @table.map { |row| row[index_or_header] } + end + end + + # :call-seq: + # table[n] = row -> row + # table[n] = field_or_array_of_fields -> field_or_array_of_fields + # table[header] = field_or_array_of_fields -> field_or_array_of_fields + # + # Puts data onto the table. + # + # --- + # + # Set a \Row by Its \Integer Index:: + # - Form: <tt>table[n] = row</tt>, +n+ an \Integer, + # +row+ a \CSV::Row instance or an \Array of fields. + # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>. + # - Return value: +row+. + # + # If the row exists, it is replaced: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # new_row = CSV::Row.new(['Name', 'Value'], ['bat', 3]) + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # return_value = table[0] = new_row + # return_value.equal?(new_row) # => true # Returned the row + # table[0].to_h # => {"Name"=>"bat", "Value"=>3} + # + # With access mode <tt>:col_or_row</tt>: + # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> + # table[0] = CSV::Row.new(['Name', 'Value'], ['bam', 4]) + # table[0].to_h # => {"Name"=>"bam", "Value"=>4} + # + # With an \Array instead of a \CSV::Row, inherits headers from the table: + # array = ['bad', 5] + # return_value = table[0] = array + # return_value.equal?(array) # => true # Returned the array + # table[0].to_h # => {"Name"=>"bad", "Value"=>5} + # + # If the row does not exist, extends the table by adding rows: + # assigns rows with +nil+ as needed: + # table.size # => 3 + # table[5] = ['bag', 6] + # table.size # => 6 + # table[3] # => nil + # table[4]# => nil + # table[5].to_h # => {"Name"=>"bag", "Value"=>6} + # + # Note that the +nil+ rows are actually +nil+, not a row of +nil+ fields. + # + # --- + # + # Set a Column by Its \Integer Index:: + # - Form: <tt>table[n] = array_of_fields</tt>, +n+ an \Integer, + # +array_of_fields+ an \Array of \String fields. + # - Access mode: <tt>:col</tt>. + # - Return value: +array_of_fields+. + # + # If the column exists, it is replaced: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # new_col = [3, 4, 5] + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # return_value = table[1] = new_col + # return_value.equal?(new_col) # => true # Returned the column + # table[1] # => [3, 4, 5] + # # The rows, as revised: + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # table[0].to_h # => {"Name"=>"foo", "Value"=>3} + # table[1].to_h # => {"Name"=>"bar", "Value"=>4} + # table[2].to_h # => {"Name"=>"baz", "Value"=>5} + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # + # If there are too few values, fills with +nil+ values: + # table[1] = [0] + # table[1] # => [0, nil, nil] + # + # If there are too many values, ignores the extra values: + # table[1] = [0, 1, 2, 3, 4] + # table[1] # => [0, 1, 2] + # + # If a single value is given, replaces all fields in the column with that value: + # table[1] = 'bat' + # table[1] # => ["bat", "bat", "bat"] + # + # --- + # + # Set a Column by Its \String Header:: + # - Form: <tt>table[header] = field_or_array_of_fields</tt>, + # +header+ a \String header, +field_or_array_of_fields+ a field value + # or an \Array of \String fields. + # - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>. + # - Return value: +field_or_array_of_fields+. + # + # If the column exists, it is replaced: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # new_col = [3, 4, 5] + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # return_value = table['Value'] = new_col + # return_value.equal?(new_col) # => true # Returned the column + # table['Value'] # => [3, 4, 5] + # # The rows, as revised: + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # table[0].to_h # => {"Name"=>"foo", "Value"=>3} + # table[1].to_h # => {"Name"=>"bar", "Value"=>4} + # table[2].to_h # => {"Name"=>"baz", "Value"=>5} + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # + # If there are too few values, fills with +nil+ values: + # table['Value'] = [0] + # table['Value'] # => [0, nil, nil] + # + # If there are too many values, ignores the extra values: + # table['Value'] = [0, 1, 2, 3, 4] + # table['Value'] # => [0, 1, 2] + # + # If the column does not exist, extends the table by adding columns: + # table['Note'] = ['x', 'y', 'z'] + # table['Note'] # => ["x", "y", "z"] + # # The rows, as revised: + # table.by_row! + # table[0].to_h # => {"Name"=>"foo", "Value"=>0, "Note"=>"x"} + # table[1].to_h # => {"Name"=>"bar", "Value"=>1, "Note"=>"y"} + # table[2].to_h # => {"Name"=>"baz", "Value"=>2, "Note"=>"z"} + # table.by_col! + # + # If a single value is given, replaces all fields in the column with that value: + # table['Value'] = 'bat' + # table['Value'] # => ["bat", "bat", "bat"] + def []=(index_or_header, value) + if @mode == :row or # by index + (@mode == :col_or_row and index_or_header.is_a? Integer) + if value.is_a? Array + @table[index_or_header] = Row.new(headers, value) + else + @table[index_or_header] = value + end + else # set column + unless index_or_header.is_a? Integer + index = @headers.index(index_or_header) || @headers.size + @headers[index] = index_or_header + end + if value.is_a? Array # multiple values + @table.each_with_index do |row, i| + if row.header_row? + row[index_or_header] = index_or_header + else + row[index_or_header] = value[i] + end + end + else # repeated value + @table.each do |row| + if row.header_row? + row[index_or_header] = index_or_header + else + row[index_or_header] = value + end + end + end + end + end + + # :call-seq: + # table.values_at(*indexes) -> array_of_rows + # table.values_at(*headers) -> array_of_columns_data + # + # If the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>, + # and each argument is either an \Integer or a \Range, + # returns rows. + # Otherwise, returns columns data. + # + # In either case, the returned values are in the order + # specified by the arguments. Arguments may be repeated. + # + # --- + # + # Returns rows as an \Array of \CSV::Row objects. + # + # No argument: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.values_at # => [] + # + # One index: + # values = table.values_at(0) + # values # => [#<CSV::Row "Name":"foo" "Value":"0">] + # + # Two indexes: + # values = table.values_at(2, 0) + # values # => [#<CSV::Row "Name":"baz" "Value":"2">, #<CSV::Row "Name":"foo" "Value":"0">] + # + # One \Range: + # values = table.values_at(1..2) + # values # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] + # + # \Ranges and indexes: + # values = table.values_at(0..1, 1..2, 0, 2) + # pp values + # Output: + # [#<CSV::Row "Name":"foo" "Value":"0">, + # #<CSV::Row "Name":"bar" "Value":"1">, + # #<CSV::Row "Name":"bar" "Value":"1">, + # #<CSV::Row "Name":"baz" "Value":"2">, + # #<CSV::Row "Name":"foo" "Value":"0">, + # #<CSV::Row "Name":"baz" "Value":"2">] + # + # --- + # + # Returns columns data as row Arrays, + # each consisting of the specified columns data for that row: + # values = table.values_at('Name') + # values # => [["foo"], ["bar"], ["baz"]] + # values = table.values_at('Value', 'Name') + # values # => [["0", "foo"], ["1", "bar"], ["2", "baz"]] + def values_at(*indices_or_headers) + if @mode == :row or # by indices + ( @mode == :col_or_row and indices_or_headers.all? do |index| + index.is_a?(Integer) or + ( index.is_a?(Range) and + index.first.is_a?(Integer) and + index.last.is_a?(Integer) ) + end ) + @table.values_at(*indices_or_headers) + else # by headers + @table.map { |row| row.values_at(*indices_or_headers) } + end + end + + # :call-seq: + # table << row_or_array -> self + # + # If +row_or_array+ is a \CSV::Row object, + # it is appended to the table: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table << CSV::Row.new(table.headers, ['bat', 3]) + # table[3] # => #<CSV::Row "Name":"bat" "Value":3> + # + # If +row_or_array+ is an \Array, it is used to create a new + # \CSV::Row object which is then appended to the table: + # table << ['bam', 4] + # table[4] # => #<CSV::Row "Name":"bam" "Value":4> + def <<(row_or_array) + if row_or_array.is_a? Array # append Array + @table << Row.new(headers, row_or_array) + else # append Row + @table << row_or_array + end + + self # for chaining + end + + # + # :call-seq: + # table.push(*rows_or_arrays) -> self + # + # A shortcut for appending multiple rows. Equivalent to: + # rows.each {|row| self << row } + # + # Each argument may be either a \CSV::Row object or an \Array: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # rows = [ + # CSV::Row.new(table.headers, ['bat', 3]), + # ['bam', 4] + # ] + # table.push(*rows) + # table[3..4] # => [#<CSV::Row "Name":"bat" "Value":3>, #<CSV::Row "Name":"bam" "Value":4>] + def push(*rows) + rows.each { |row| self << row } + + self # for chaining + end + + # :call-seq: + # table.delete(*indexes) -> deleted_values + # table.delete(*headers) -> deleted_values + # + # If the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>, + # and each argument is either an \Integer or a \Range, + # returns deleted rows. + # Otherwise, returns deleted columns data. + # + # In either case, the returned values are in the order + # specified by the arguments. Arguments may be repeated. + # + # --- + # + # Returns rows as an \Array of \CSV::Row objects. + # + # One index: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # deleted_values = table.delete(0) + # deleted_values # => [#<CSV::Row "Name":"foo" "Value":"0">] + # + # Two indexes: + # table = CSV.parse(source, headers: true) + # deleted_values = table.delete(2, 0) + # deleted_values # => [#<CSV::Row "Name":"baz" "Value":"2">, #<CSV::Row "Name":"foo" "Value":"0">] + # + # --- + # + # Returns columns data as column Arrays. + # + # One header: + # table = CSV.parse(source, headers: true) + # deleted_values = table.delete('Name') + # deleted_values # => ["foo", "bar", "baz"] + # + # Two headers: + # table = CSV.parse(source, headers: true) + # deleted_values = table.delete('Value', 'Name') + # deleted_values # => [["0", "1", "2"], ["foo", "bar", "baz"]] + def delete(*indexes_or_headers) + if indexes_or_headers.empty? + raise ArgumentError, "wrong number of arguments (given 0, expected 1+)" + end + deleted_values = indexes_or_headers.map do |index_or_header| + if @mode == :row or # by index + (@mode == :col_or_row and index_or_header.is_a? Integer) + @table.delete_at(index_or_header) + else # by header + if index_or_header.is_a? Integer + @headers.delete_at(index_or_header) + else + @headers.delete(index_or_header) + end + @table.map { |row| row.delete(index_or_header).last } + end + end + if indexes_or_headers.size == 1 + deleted_values[0] + else + deleted_values + end + end + + # :call-seq: + # table.delete_if {|row_or_column| ... } -> self + # + # Removes rows or columns for which the block returns a truthy value; + # returns +self+. + # + # Removes rows when the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>; + # calls the block with each \CSV::Row object: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # table.size # => 3 + # table.delete_if {|row| row['Name'].start_with?('b') } + # table.size # => 1 + # + # Removes columns when the access mode is <tt>:col</tt>; + # calls the block with each column as a 2-element array + # containing the header and an \Array of column fields: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # table.headers.size # => 2 + # table.delete_if {|column_data| column_data[1].include?('2') } + # table.headers.size # => 1 + # + # Returns a new \Enumerator if no block is given: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.delete_if # => #<Enumerator: #<CSV::Table mode:col_or_row row_count:4>:delete_if> + def delete_if(&block) + return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size } unless block_given? + + if @mode == :row or @mode == :col_or_row # by index + @table.delete_if(&block) + else # by header + deleted = [] + headers.each do |header| + deleted << delete(header) if yield([header, self[header]]) + end + end + + self # for chaining + end + + include Enumerable + + # :call-seq: + # table.each {|row_or_column| ... ) -> self + # + # Calls the block with each row or column; returns +self+. + # + # When the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>, + # calls the block with each \CSV::Row object: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # table.each {|row| p row } + # Output: + # #<CSV::Row "Name":"foo" "Value":"0"> + # #<CSV::Row "Name":"bar" "Value":"1"> + # #<CSV::Row "Name":"baz" "Value":"2"> + # + # When the access mode is <tt>:col</tt>, + # calls the block with each column as a 2-element array + # containing the header and an \Array of column fields: + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # table.each {|column_data| p column_data } + # Output: + # ["Name", ["foo", "bar", "baz"]] + # ["Value", ["0", "1", "2"]] + # + # Returns a new \Enumerator if no block is given: + # table.each # => #<Enumerator: #<CSV::Table mode:col row_count:4>:each> + def each(&block) + return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given? + + if @mode == :col + headers.each.with_index do |header, i| + yield([header, @table.map {|row| row[header, i]}]) + end + else + @table.each(&block) + end + + self # for chaining + end + + # :call-seq: + # table == other_table -> true or false + # + # Returns +true+ if all each row of +self+ <tt>==</tt> + # the corresponding row of +other_table+, otherwise, +false+. + # + # The access mode does no affect the result. + # + # Equal tables: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # other_table = CSV.parse(source, headers: true) + # table == other_table # => true + # + # Different row count: + # other_table.delete(2) + # table == other_table # => false + # + # Different last row: + # other_table << ['bat', 3] + # table == other_table # => false + def ==(other) + return @table == other.table if other.is_a? CSV::Table + @table == other + end + + # :call-seq: + # table.to_a -> array_of_arrays + # + # Returns the table as an \Array of \Arrays; + # the headers are in the first row: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.to_a # => [["Name", "Value"], ["foo", "0"], ["bar", "1"], ["baz", "2"]] + def to_a + array = [headers] + @table.each do |row| + array.push(row.fields) unless row.header_row? + end + + array + end + + # :call-seq: + # table.to_csv(**options) -> csv_string + # + # Returns the table as \CSV string. + # See {Options for Generating}[../CSV.html#class-CSV-label-Options+for+Generating]. + # + # Defaults option +write_headers+ to +true+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.to_csv # => "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # + # Omits the headers if option +write_headers+ is given as +false+ + # (see {Option +write_headers+}[../CSV.html#class-CSV-label-Option+write_headers]): + # table.to_csv(write_headers: false) # => "foo,0\nbar,1\nbaz,2\n" + # + # Limit rows if option +limit+ is given like +2+: + # table.to_csv(limit: 2) # => "Name,Value\nfoo,0\nbar,1\n" + def to_csv(write_headers: true, limit: nil, **options) + array = write_headers ? [headers.to_csv(**options)] : [] + limit ||= @table.size + limit = @table.size + 1 + limit if limit < 0 + limit = 0 if limit < 0 + @table.first(limit).each do |row| + array.push(row.fields.to_csv(**options)) unless row.header_row? + end + + array.join("") + end + alias_method :to_s, :to_csv + + # + # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step, + # returning nil if any intermediate step is nil. + # + def dig(index_or_header, *index_or_headers) + value = self[index_or_header] + if value.nil? + nil + elsif index_or_headers.empty? + value + else + unless value.respond_to?(:dig) + raise TypeError, "#{value.class} does not have \#dig method" + end + value.dig(*index_or_headers) + end + end + + # :call-seq: + # table.inspect => string + # + # Returns a <tt>US-ASCII</tt>-encoded \String showing table: + # - Class: <tt>CSV::Table</tt>. + # - Access mode: <tt>:row</tt>, <tt>:col</tt>, or <tt>:col_or_row</tt>. + # - Size: Row count, including the header row. + # + # Example: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.inspect # => "#<CSV::Table mode:col_or_row row_count:4>\nName,Value\nfoo,0\nbar,1\nbaz,2\n" + # + def inspect + inspected = +"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>" + summary = to_csv(limit: 5) + inspected << "\n" << summary if summary.encoding.ascii_compatible? + inspected + end + end +end diff --git a/lib/csv/version.rb b/lib/csv/version.rb new file mode 100644 index 0000000000..ca16064a89 --- /dev/null +++ b/lib/csv/version.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +class CSV + # The version of the installed library. + VERSION = "3.2.5" +end diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb new file mode 100644 index 0000000000..030a295bc9 --- /dev/null +++ b/lib/csv/writer.rb @@ -0,0 +1,210 @@ +# frozen_string_literal: true + +require_relative "input_record_separator" +require_relative "row" + +class CSV + # Note: Don't use this class directly. This is an internal class. + class Writer + # + # A CSV::Writer receives an output, prepares the header, format and output. + # It allows us to write new rows in the object and rewind it. + # + attr_reader :lineno + attr_reader :headers + + def initialize(output, options) + @output = output + @options = options + @lineno = 0 + @fields_converter = nil + prepare + if @options[:write_headers] and @headers + self << @headers + end + @fields_converter = @options[:fields_converter] + end + + # + # Adds a new row + # + def <<(row) + case row + when Row + row = row.fields + when Hash + row = @headers.collect {|header| row[header]} + end + + @headers ||= row if @use_headers + @lineno += 1 + + if @fields_converter + quoted_fields = [false] * row.size + row = @fields_converter.convert(row, nil, lineno, quoted_fields) + end + + i = -1 + converted_row = row.collect do |field| + i += 1 + quote(field, i) + end + line = converted_row.join(@column_separator) + @row_separator + if @output_encoding + line = line.encode(@output_encoding) + end + @output << line + + self + end + + # + # Winds back to the beginning + # + def rewind + @lineno = 0 + @headers = nil if @options[:headers].nil? + end + + private + def prepare + @encoding = @options[:encoding] + + prepare_header + prepare_format + prepare_output + end + + def prepare_header + headers = @options[:headers] + case headers + when Array + @headers = headers + @use_headers = true + when String + @headers = CSV.parse_line(headers, + col_sep: @options[:column_separator], + row_sep: @options[:row_separator], + quote_char: @options[:quote_character]) + @use_headers = true + when true + @headers = nil + @use_headers = true + else + @headers = nil + @use_headers = false + end + return unless @headers + + converter = @options[:header_fields_converter] + @headers = converter.convert(@headers, nil, 0, []) + @headers.each do |header| + header.freeze if header.is_a?(String) + end + end + + def prepare_force_quotes_fields(force_quotes) + @force_quotes_fields = {} + force_quotes.each do |name_or_index| + case name_or_index + when Integer + index = name_or_index + @force_quotes_fields[index] = true + when String, Symbol + name = name_or_index.to_s + if @headers.nil? + message = ":headers is required when you use field name " + + "in :force_quotes: " + + "#{name_or_index.inspect}: #{force_quotes.inspect}" + raise ArgumentError, message + end + index = @headers.index(name) + next if index.nil? + @force_quotes_fields[index] = true + else + message = ":force_quotes element must be " + + "field index or field name: " + + "#{name_or_index.inspect}: #{force_quotes.inspect}" + raise ArgumentError, message + end + end + end + + def prepare_format + @column_separator = @options[:column_separator].to_s.encode(@encoding) + row_separator = @options[:row_separator] + if row_separator == :auto + @row_separator = InputRecordSeparator.value.encode(@encoding) + else + @row_separator = row_separator.to_s.encode(@encoding) + end + @quote_character = @options[:quote_character] + force_quotes = @options[:force_quotes] + if force_quotes.is_a?(Array) + prepare_force_quotes_fields(force_quotes) + @force_quotes = false + elsif force_quotes + @force_quotes_fields = nil + @force_quotes = true + else + @force_quotes_fields = nil + @force_quotes = false + end + unless @force_quotes + @quotable_pattern = + Regexp.new("[\r\n".encode(@encoding) + + Regexp.escape(@column_separator) + + Regexp.escape(@quote_character.encode(@encoding)) + + "]".encode(@encoding)) + end + @quote_empty = @options.fetch(:quote_empty, true) + end + + def prepare_output + @output_encoding = nil + return unless @output.is_a?(StringIO) + + output_encoding = @output.internal_encoding || @output.external_encoding + if @encoding != output_encoding + if @options[:force_encoding] + @output_encoding = output_encoding + else + compatible_encoding = Encoding.compatible?(@encoding, output_encoding) + if compatible_encoding + @output.set_encoding(compatible_encoding) + @output.seek(0, IO::SEEK_END) + end + end + end + end + + def quote_field(field) + field = String(field) + encoded_quote_character = @quote_character.encode(field.encoding) + encoded_quote_character + + field.gsub(encoded_quote_character, + encoded_quote_character * 2) + + encoded_quote_character + end + + def quote(field, i) + if @force_quotes + quote_field(field) + elsif @force_quotes_fields and @force_quotes_fields[i] + quote_field(field) + else + if field.nil? # represent +nil+ fields as empty unquoted fields + "" + else + field = String(field) # Stringify fields + # represent empty fields as empty quoted fields + if (@quote_empty and field.empty?) or (field.valid_encoding? and @quotable_pattern.match?(field)) + quote_field(field) + else + field # unquoted field + end + end + end + end + end +end |
