summaryrefslogtreecommitdiff
path: root/lib/csv
diff options
context:
space:
mode:
Diffstat (limited to 'lib/csv')
-rw-r--r--lib/csv/core_ext/array.rb9
-rw-r--r--lib/csv/core_ext/string.rb9
-rw-r--r--lib/csv/csv.gemspec64
-rw-r--r--lib/csv/delete_suffix.rb18
-rw-r--r--lib/csv/fields_converter.rb89
-rw-r--r--lib/csv/input_record_separator.rb18
-rw-r--r--lib/csv/match_p.rb20
-rw-r--r--lib/csv/parser.rb1289
-rw-r--r--lib/csv/row.rb757
-rw-r--r--lib/csv/table.rb1056
-rw-r--r--lib/csv/version.rb6
-rw-r--r--lib/csv/writer.rb210
12 files changed, 3545 insertions, 0 deletions
diff --git a/lib/csv/core_ext/array.rb b/lib/csv/core_ext/array.rb
new file mode 100644
index 0000000000..8beb06b082
--- /dev/null
+++ b/lib/csv/core_ext/array.rb
@@ -0,0 +1,9 @@
+class Array # :nodoc:
+ # Equivalent to CSV::generate_line(self, options)
+ #
+ # ["CSV", "data"].to_csv
+ # #=> "CSV,data\n"
+ def to_csv(**options)
+ CSV.generate_line(self, **options)
+ end
+end
diff --git a/lib/csv/core_ext/string.rb b/lib/csv/core_ext/string.rb
new file mode 100644
index 0000000000..9b1d31c2a4
--- /dev/null
+++ b/lib/csv/core_ext/string.rb
@@ -0,0 +1,9 @@
+class String # :nodoc:
+ # Equivalent to CSV::parse_line(self, options)
+ #
+ # "CSV,data".parse_csv
+ # #=> ["CSV", "data"]
+ def parse_csv(**options)
+ CSV.parse_line(self, **options)
+ end
+end
diff --git a/lib/csv/csv.gemspec b/lib/csv/csv.gemspec
new file mode 100644
index 0000000000..11c5b0f2a6
--- /dev/null
+++ b/lib/csv/csv.gemspec
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+begin
+ require_relative "lib/csv/version"
+rescue LoadError
+ # for Ruby core repository
+ require_relative "version"
+end
+
+Gem::Specification.new do |spec|
+ spec.name = "csv"
+ spec.version = CSV::VERSION
+ spec.authors = ["James Edward Gray II", "Kouhei Sutou"]
+ spec.email = [nil, "kou@cozmixng.org"]
+
+ spec.summary = "CSV Reading and Writing"
+ spec.description = "The CSV library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed."
+ spec.homepage = "https://github.com/ruby/csv"
+ spec.licenses = ["Ruby", "BSD-2-Clause"]
+
+ lib_path = "lib"
+ spec.require_paths = [lib_path]
+ files = []
+ lib_dir = File.join(__dir__, lib_path)
+ if File.exist?(lib_dir)
+ Dir.chdir(lib_dir) do
+ Dir.glob("**/*.rb").each do |file|
+ files << "lib/#{file}"
+ end
+ end
+ end
+ doc_dir = File.join(__dir__, "doc")
+ if File.exist?(doc_dir)
+ Dir.chdir(doc_dir) do
+ Dir.glob("**/*.rdoc").each do |rdoc_file|
+ files << "doc/#{rdoc_file}"
+ end
+ end
+ end
+ spec.files = files
+ spec.rdoc_options.concat(["--main", "README.md"])
+ rdoc_files = [
+ "LICENSE.txt",
+ "NEWS.md",
+ "README.md",
+ ]
+ recipes_dir = File.join(doc_dir, "csv", "recipes")
+ if File.exist?(recipes_dir)
+ Dir.chdir(recipes_dir) do
+ Dir.glob("**/*.rdoc").each do |recipe_file|
+ rdoc_files << "doc/csv/recipes/#{recipe_file}"
+ end
+ end
+ end
+ spec.extra_rdoc_files = rdoc_files
+
+ spec.required_ruby_version = ">= 2.5.0"
+
+ # spec.add_dependency "stringio", ">= 0.1.3"
+ spec.add_development_dependency "bundler"
+ spec.add_development_dependency "rake"
+ spec.add_development_dependency "benchmark_driver"
+ spec.add_development_dependency "test-unit", ">= 3.4.8"
+end
diff --git a/lib/csv/delete_suffix.rb b/lib/csv/delete_suffix.rb
new file mode 100644
index 0000000000..d457718997
--- /dev/null
+++ b/lib/csv/delete_suffix.rb
@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+
+# This provides String#delete_suffix? for Ruby 2.4.
+unless String.method_defined?(:delete_suffix)
+ class CSV
+ module DeleteSuffix
+ refine String do
+ def delete_suffix(suffix)
+ if end_with?(suffix)
+ self[0...-suffix.size]
+ else
+ self
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/csv/fields_converter.rb b/lib/csv/fields_converter.rb
new file mode 100644
index 0000000000..d15977d379
--- /dev/null
+++ b/lib/csv/fields_converter.rb
@@ -0,0 +1,89 @@
+# frozen_string_literal: true
+
+class CSV
+ # Note: Don't use this class directly. This is an internal class.
+ class FieldsConverter
+ include Enumerable
+ #
+ # A CSV::FieldsConverter is a data structure for storing the
+ # fields converter properties to be passed as a parameter
+ # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
+ #
+
+ def initialize(options={})
+ @converters = []
+ @nil_value = options[:nil_value]
+ @empty_value = options[:empty_value]
+ @empty_value_is_empty_string = (@empty_value == "")
+ @accept_nil = options[:accept_nil]
+ @builtin_converters_name = options[:builtin_converters_name]
+ @need_static_convert = need_static_convert?
+ end
+
+ def add_converter(name=nil, &converter)
+ if name.nil? # custom converter
+ @converters << converter
+ else # named converter
+ combo = builtin_converters[name]
+ case combo
+ when Array # combo converter
+ combo.each do |sub_name|
+ add_converter(sub_name)
+ end
+ else # individual named converter
+ @converters << combo
+ end
+ end
+ end
+
+ def each(&block)
+ @converters.each(&block)
+ end
+
+ def empty?
+ @converters.empty?
+ end
+
+ def convert(fields, headers, lineno, quoted_fields)
+ return fields unless need_convert?
+
+ fields.collect.with_index do |field, index|
+ if field.nil?
+ field = @nil_value
+ elsif field.is_a?(String) and field.empty?
+ field = @empty_value unless @empty_value_is_empty_string
+ end
+ @converters.each do |converter|
+ break if field.nil? and @accept_nil
+ if converter.arity == 1 # straight field converter
+ field = converter[field]
+ else # FieldInfo converter
+ if headers
+ header = headers[index]
+ else
+ header = nil
+ end
+ quoted = quoted_fields[index]
+ field = converter[field, FieldInfo.new(index, lineno, header, quoted)]
+ end
+ break unless field.is_a?(String) # short-circuit pipeline for speed
+ end
+ field # final state of each field, converted or original
+ end
+ end
+
+ private
+ def need_static_convert?
+ not (@nil_value.nil? and @empty_value_is_empty_string)
+ end
+
+ def need_convert?
+ @need_static_convert or
+ (not @converters.empty?)
+ end
+
+ def builtin_converters
+ @builtin_converters ||= ::CSV.const_get(@builtin_converters_name)
+ end
+ end
+end
diff --git a/lib/csv/input_record_separator.rb b/lib/csv/input_record_separator.rb
new file mode 100644
index 0000000000..7a99343c0c
--- /dev/null
+++ b/lib/csv/input_record_separator.rb
@@ -0,0 +1,18 @@
+require "English"
+require "stringio"
+
+class CSV
+ module InputRecordSeparator
+ class << self
+ if RUBY_VERSION >= "3.0.0"
+ def value
+ "\n"
+ end
+ else
+ def value
+ $INPUT_RECORD_SEPARATOR
+ end
+ end
+ end
+ end
+end
diff --git a/lib/csv/match_p.rb b/lib/csv/match_p.rb
new file mode 100644
index 0000000000..775559a3eb
--- /dev/null
+++ b/lib/csv/match_p.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+# This provides String#match? and Regexp#match? for Ruby 2.3.
+unless String.method_defined?(:match?)
+ class CSV
+ module MatchP
+ refine String do
+ def match?(pattern)
+ self =~ pattern
+ end
+ end
+
+ refine Regexp do
+ def match?(string)
+ self =~ string
+ end
+ end
+ end
+ end
+end
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
new file mode 100644
index 0000000000..afb3131cd5
--- /dev/null
+++ b/lib/csv/parser.rb
@@ -0,0 +1,1289 @@
+# frozen_string_literal: true
+
+require "strscan"
+
+require_relative "input_record_separator"
+require_relative "row"
+require_relative "table"
+
+class CSV
+ # Note: Don't use this class directly. This is an internal class.
+ class Parser
+ #
+ # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
+ # or String object being read from or written to. Your data is never transcoded
+ # (unless you ask Ruby to transcode it for you) and will literally be parsed in
+ # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
+ # Encoding of your data. This is accomplished by transcoding the parser itself
+ # into your Encoding.
+ #
+
+ # Raised when encoding is invalid.
+ class InvalidEncoding < StandardError
+ end
+
+ # Raised when unexpected case is happen.
+ class UnexpectedError < StandardError
+ end
+
+ #
+ # CSV::Scanner receives a CSV output, scans it and return the content.
+ # It also controls the life cycle of the object with its methods +keep_start+,
+ # +keep_end+, +keep_back+, +keep_drop+.
+ #
+ # Uses StringScanner (the official strscan gem). Strscan provides lexical
+ # scanning operations on a String. We inherit its object and take advantage
+ # on the methods. For more information, please visit:
+ # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
+ #
+ class Scanner < StringScanner
+ alias_method :scan_all, :scan
+
+ def initialize(*args)
+ super
+ @keeps = []
+ end
+
+ def each_line(row_separator)
+ position = pos
+ rest.each_line(row_separator) do |line|
+ position += line.bytesize
+ self.pos = position
+ yield(line)
+ end
+ end
+
+ def keep_start
+ @keeps.push(pos)
+ end
+
+ def keep_end
+ start = @keeps.pop
+ string.byteslice(start, pos - start)
+ end
+
+ def keep_back
+ self.pos = @keeps.pop
+ end
+
+ def keep_drop
+ @keeps.pop
+ end
+ end
+
+ #
+ # CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
+ # It also controls the life cycle of the object with its methods +keep_start+,
+ # +keep_end+, +keep_back+, +keep_drop+.
+ #
+ # CSV::InputsScanner.scan() tries to match with pattern at the current position.
+ # If there's a match, the scanner advances the "scan pointer" and returns the matched string.
+ # Otherwise, the scanner returns nil.
+ #
+ # CSV::InputsScanner.rest() returns the "rest" of the string (i.e. everything after the scan pointer).
+ # If there is no more data (eos? = true), it returns "".
+ #
+ class InputsScanner
+ def initialize(inputs, encoding, row_separator, chunk_size: 8192)
+ @inputs = inputs.dup
+ @encoding = encoding
+ @row_separator = row_separator
+ @chunk_size = chunk_size
+ @last_scanner = @inputs.empty?
+ @keeps = []
+ read_chunk
+ end
+
+ def each_line(row_separator)
+ return enum_for(__method__, row_separator) unless block_given?
+ buffer = nil
+ input = @scanner.rest
+ position = @scanner.pos
+ offset = 0
+ n_row_separator_chars = row_separator.size
+ # trace(__method__, :start, line, input)
+ while true
+ input.each_line(row_separator) do |line|
+ @scanner.pos += line.bytesize
+ if buffer
+ if n_row_separator_chars == 2 and
+ buffer.end_with?(row_separator[0]) and
+ line.start_with?(row_separator[1])
+ buffer << line[0]
+ line = line[1..-1]
+ position += buffer.bytesize + offset
+ @scanner.pos = position
+ offset = 0
+ yield(buffer)
+ buffer = nil
+ next if line.empty?
+ else
+ buffer << line
+ line = buffer
+ buffer = nil
+ end
+ end
+ if line.end_with?(row_separator)
+ position += line.bytesize + offset
+ @scanner.pos = position
+ offset = 0
+ yield(line)
+ else
+ buffer = line
+ end
+ end
+ break unless read_chunk
+ input = @scanner.rest
+ position = @scanner.pos
+ offset = -buffer.bytesize if buffer
+ end
+ yield(buffer) if buffer
+ end
+
+ def scan(pattern)
+ # trace(__method__, pattern, :start)
+ value = @scanner.scan(pattern)
+ # trace(__method__, pattern, :done, :last, value) if @last_scanner
+ return value if @last_scanner
+
+ read_chunk if value and @scanner.eos?
+ # trace(__method__, pattern, :done, value)
+ value
+ end
+
+ def scan_all(pattern)
+ # trace(__method__, pattern, :start)
+ value = @scanner.scan(pattern)
+ # trace(__method__, pattern, :done, :last, value) if @last_scanner
+ return value if @last_scanner
+
+ return nil if value.nil?
+ while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
+ # trace(__method__, pattern, :sub, sub_value)
+ value << sub_value
+ end
+ # trace(__method__, pattern, :done, value)
+ value
+ end
+
+ def eos?
+ @scanner.eos?
+ end
+
+ def keep_start
+ # trace(__method__, :start)
+ adjust_last_keep
+ @keeps.push([@scanner, @scanner.pos, nil])
+ # trace(__method__, :done)
+ end
+
+ def keep_end
+ # trace(__method__, :start)
+ scanner, start, buffer = @keeps.pop
+ if scanner == @scanner
+ keep = @scanner.string.byteslice(start, @scanner.pos - start)
+ else
+ keep = @scanner.string.byteslice(0, @scanner.pos)
+ end
+ if buffer
+ buffer << keep
+ keep = buffer
+ end
+ # trace(__method__, :done, keep)
+ keep
+ end
+
+ def keep_back
+ # trace(__method__, :start)
+ scanner, start, buffer = @keeps.pop
+ if buffer
+ # trace(__method__, :rescan, start, buffer)
+ string = @scanner.string
+ if scanner == @scanner
+ keep = string.byteslice(start, string.bytesize - start)
+ else
+ keep = string
+ end
+ if keep and not keep.empty?
+ @inputs.unshift(StringIO.new(keep))
+ @last_scanner = false
+ end
+ @scanner = StringScanner.new(buffer)
+ else
+ if @scanner != scanner
+ message = "scanners are different but no buffer: "
+ message += "#{@scanner.inspect}(#{@scanner.object_id}): "
+ message += "#{scanner.inspect}(#{scanner.object_id})"
+ raise UnexpectedError, message
+ end
+ # trace(__method__, :repos, start, buffer)
+ @scanner.pos = start
+ end
+ read_chunk if @scanner.eos?
+ end
+
+ def keep_drop
+ _, _, buffer = @keeps.pop
+ # trace(__method__, :done, :empty) unless buffer
+ return unless buffer
+
+ last_keep = @keeps.last
+ # trace(__method__, :done, :no_last_keep) unless last_keep
+ return unless last_keep
+
+ if last_keep[2]
+ last_keep[2] << buffer
+ else
+ last_keep[2] = buffer
+ end
+ # trace(__method__, :done)
+ end
+
+ def rest
+ @scanner.rest
+ end
+
+ def check(pattern)
+ @scanner.check(pattern)
+ end
+
+ private
+ def trace(*args)
+ pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps])
+ end
+
+ def adjust_last_keep
+ # trace(__method__, :start)
+
+ keep = @keeps.last
+ # trace(__method__, :done, :empty) if keep.nil?
+ return if keep.nil?
+
+ scanner, start, buffer = keep
+ string = @scanner.string
+ if @scanner != scanner
+ start = 0
+ end
+ if start == 0 and @scanner.eos?
+ keep_data = string
+ else
+ keep_data = string.byteslice(start, @scanner.pos - start)
+ end
+ if keep_data
+ if buffer
+ buffer << keep_data
+ else
+ keep[2] = keep_data.dup
+ end
+ end
+
+ # trace(__method__, :done)
+ end
+
+ def read_chunk
+ return false if @last_scanner
+
+ adjust_last_keep
+
+ input = @inputs.first
+ case input
+ when StringIO
+ string = input.read
+ raise InvalidEncoding unless string.valid_encoding?
+ # trace(__method__, :stringio, string)
+ @scanner = StringScanner.new(string)
+ @inputs.shift
+ @last_scanner = @inputs.empty?
+ true
+ else
+ chunk = input.gets(@row_separator, @chunk_size)
+ if chunk
+ raise InvalidEncoding unless chunk.valid_encoding?
+ # trace(__method__, :chunk, chunk)
+ @scanner = StringScanner.new(chunk)
+ if input.respond_to?(:eof?) and input.eof?
+ @inputs.shift
+ @last_scanner = @inputs.empty?
+ end
+ true
+ else
+ # trace(__method__, :no_chunk)
+ @scanner = StringScanner.new("".encode(@encoding))
+ @inputs.shift
+ @last_scanner = @inputs.empty?
+ if @last_scanner
+ false
+ else
+ read_chunk
+ end
+ end
+ end
+ end
+ end
+
+ def initialize(input, options)
+ @input = input
+ @options = options
+ @samples = []
+
+ prepare
+ end
+
+ def column_separator
+ @column_separator
+ end
+
+ def row_separator
+ @row_separator
+ end
+
+ def quote_character
+ @quote_character
+ end
+
+ def field_size_limit
+ @max_field_size&.succ
+ end
+
+ def max_field_size
+ @max_field_size
+ end
+
+ def skip_lines
+ @skip_lines
+ end
+
+ def unconverted_fields?
+ @unconverted_fields
+ end
+
+ def headers
+ @headers
+ end
+
+ def header_row?
+ @use_headers and @headers.nil?
+ end
+
+ def return_headers?
+ @return_headers
+ end
+
+ def skip_blanks?
+ @skip_blanks
+ end
+
+ def liberal_parsing?
+ @liberal_parsing
+ end
+
+ def lineno
+ @lineno
+ end
+
+ def line
+ last_line
+ end
+
+ def parse(&block)
+ return to_enum(__method__) unless block_given?
+
+ if @return_headers and @headers and @raw_headers
+ headers = Row.new(@headers, @raw_headers, true)
+ if @unconverted_fields
+ headers = add_unconverted_fields(headers, [])
+ end
+ yield headers
+ end
+
+ begin
+ @scanner ||= build_scanner
+ if quote_character.nil?
+ parse_no_quote(&block)
+ elsif @need_robust_parsing
+ parse_quotable_robust(&block)
+ else
+ parse_quotable_loose(&block)
+ end
+ rescue InvalidEncoding
+ if @scanner
+ ignore_broken_line
+ lineno = @lineno
+ else
+ lineno = @lineno + 1
+ end
+ message = "Invalid byte sequence in #{@encoding}"
+ raise MalformedCSVError.new(message, lineno)
+ rescue UnexpectedError => error
+ if @scanner
+ ignore_broken_line
+ lineno = @lineno
+ else
+ lineno = @lineno + 1
+ end
+ message = "This should not be happen: #{error.message}: "
+ message += "Please report this to https://github.com/ruby/csv/issues"
+ raise MalformedCSVError.new(message, lineno)
+ end
+ end
+
+ def use_headers?
+ @use_headers
+ end
+
+ private
+ # A set of tasks to prepare the file in order to parse it
+ def prepare
+ prepare_variable
+ prepare_quote_character
+ prepare_backslash
+ prepare_skip_lines
+ prepare_strip
+ prepare_separators
+ validate_strip_and_col_sep_options
+ prepare_quoted
+ prepare_unquoted
+ prepare_line
+ prepare_header
+ prepare_parser
+ end
+
+ def prepare_variable
+ @need_robust_parsing = false
+ @encoding = @options[:encoding]
+ liberal_parsing = @options[:liberal_parsing]
+ if liberal_parsing
+ @liberal_parsing = true
+ if liberal_parsing.is_a?(Hash)
+ @double_quote_outside_quote =
+ liberal_parsing[:double_quote_outside_quote]
+ @backslash_quote = liberal_parsing[:backslash_quote]
+ else
+ @double_quote_outside_quote = false
+ @backslash_quote = false
+ end
+ @need_robust_parsing = true
+ else
+ @liberal_parsing = false
+ @backslash_quote = false
+ end
+ @unconverted_fields = @options[:unconverted_fields]
+ @max_field_size = @options[:max_field_size]
+ @skip_blanks = @options[:skip_blanks]
+ @fields_converter = @options[:fields_converter]
+ @header_fields_converter = @options[:header_fields_converter]
+ end
+
+ def prepare_quote_character
+ @quote_character = @options[:quote_character]
+ if @quote_character.nil?
+ @escaped_quote_character = nil
+ @escaped_quote = nil
+ else
+ @quote_character = @quote_character.to_s.encode(@encoding)
+ if @quote_character.length != 1
+ message = ":quote_char has to be nil or a single character String"
+ raise ArgumentError, message
+ end
+ @double_quote_character = @quote_character * 2
+ @escaped_quote_character = Regexp.escape(@quote_character)
+ @escaped_quote = Regexp.new(@escaped_quote_character)
+ end
+ end
+
+ def prepare_backslash
+ return unless @backslash_quote
+
+ @backslash_character = "\\".encode(@encoding)
+
+ @escaped_backslash_character = Regexp.escape(@backslash_character)
+ @escaped_backslash = Regexp.new(@escaped_backslash_character)
+ if @quote_character.nil?
+ @backslash_quote_character = nil
+ else
+ @backslash_quote_character =
+ @backslash_character + @escaped_quote_character
+ end
+ end
+
+ def prepare_skip_lines
+ skip_lines = @options[:skip_lines]
+ case skip_lines
+ when String
+ @skip_lines = skip_lines.encode(@encoding)
+ when Regexp, nil
+ @skip_lines = skip_lines
+ else
+ unless skip_lines.respond_to?(:match)
+ message =
+ ":skip_lines has to respond to \#match: #{skip_lines.inspect}"
+ raise ArgumentError, message
+ end
+ @skip_lines = skip_lines
+ end
+ end
+
+ def prepare_strip
+ @strip = @options[:strip]
+ @escaped_strip = nil
+ @strip_value = nil
+ @rstrip_value = nil
+ if @strip.is_a?(String)
+ case @strip.length
+ when 0
+ raise ArgumentError, ":strip must not be an empty String"
+ when 1
+ # ok
+ else
+ raise ArgumentError, ":strip doesn't support 2 or more characters yet"
+ end
+ @strip = @strip.encode(@encoding)
+ @escaped_strip = Regexp.escape(@strip)
+ if @quote_character
+ @strip_value = Regexp.new(@escaped_strip +
+ "+".encode(@encoding))
+ @rstrip_value = Regexp.new(@escaped_strip +
+ "+\\z".encode(@encoding))
+ end
+ @need_robust_parsing = true
+ elsif @strip
+ strip_values = " \t\f\v"
+ @escaped_strip = strip_values.encode(@encoding)
+ if @quote_character
+ @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
+ @rstrip_value = Regexp.new("[#{strip_values}]+\\z".encode(@encoding))
+ end
+ @need_robust_parsing = true
+ end
+ end
+
+ begin
+ StringScanner.new("x").scan("x")
+ rescue TypeError
+ STRING_SCANNER_SCAN_ACCEPT_STRING = false
+ else
+ STRING_SCANNER_SCAN_ACCEPT_STRING = true
+ end
+
+ def prepare_separators
+ column_separator = @options[:column_separator]
+ @column_separator = column_separator.to_s.encode(@encoding)
+ if @column_separator.size < 1
+ message = ":col_sep must be 1 or more characters: "
+ message += column_separator.inspect
+ raise ArgumentError, message
+ end
+ @row_separator =
+ resolve_row_separator(@options[:row_separator]).encode(@encoding)
+
+ @escaped_column_separator = Regexp.escape(@column_separator)
+ @escaped_first_column_separator = Regexp.escape(@column_separator[0])
+ if @column_separator.size > 1
+ @column_end = Regexp.new(@escaped_column_separator)
+ @column_ends = @column_separator.each_char.collect do |char|
+ Regexp.new(Regexp.escape(char))
+ end
+ @first_column_separators = Regexp.new(@escaped_first_column_separator +
+ "+".encode(@encoding))
+ else
+ if STRING_SCANNER_SCAN_ACCEPT_STRING
+ @column_end = @column_separator
+ else
+ @column_end = Regexp.new(@escaped_column_separator)
+ end
+ @column_ends = nil
+ @first_column_separators = nil
+ end
+
+ escaped_row_separator = Regexp.escape(@row_separator)
+ @row_end = Regexp.new(escaped_row_separator)
+ if @row_separator.size > 1
+ @row_ends = @row_separator.each_char.collect do |char|
+ Regexp.new(Regexp.escape(char))
+ end
+ else
+ @row_ends = nil
+ end
+
+ @cr = "\r".encode(@encoding)
+ @lf = "\n".encode(@encoding)
+ @line_end = Regexp.new("\r\n|\n|\r".encode(@encoding))
+ @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
+ end
+
+ # This method verifies that there are no (obvious) ambiguities with the
+ # provided +col_sep+ and +strip+ parsing options. For example, if +col_sep+
+ # and +strip+ were both equal to +\t+, then there would be no clear way to
+ # parse the input.
+ def validate_strip_and_col_sep_options
+ return unless @strip
+
+ if @strip.is_a?(String)
+ if @column_separator.start_with?(@strip) || @column_separator.end_with?(@strip)
+ raise ArgumentError,
+ "The provided strip (#{@escaped_strip}) and " \
+ "col_sep (#{@escaped_column_separator}) options are incompatible."
+ end
+ else
+ if Regexp.new("\\A[#{@escaped_strip}]|[#{@escaped_strip}]\\z").match?(@column_separator)
+ raise ArgumentError,
+ "The provided strip (true) and " \
+ "col_sep (#{@escaped_column_separator}) options are incompatible."
+ end
+ end
+ end
+
+ def prepare_quoted
+ if @quote_character
+ @quotes = Regexp.new(@escaped_quote_character +
+ "+".encode(@encoding))
+ no_quoted_values = @escaped_quote_character.dup
+ if @backslash_quote
+ no_quoted_values << @escaped_backslash_character
+ end
+ @quoted_value = Regexp.new("[^".encode(@encoding) +
+ no_quoted_values +
+ "]+".encode(@encoding))
+ end
+ if @escaped_strip
+ @split_column_separator = Regexp.new(@escaped_strip +
+ "*".encode(@encoding) +
+ @escaped_column_separator +
+ @escaped_strip +
+ "*".encode(@encoding))
+ else
+ if @column_separator == " ".encode(@encoding)
+ @split_column_separator = Regexp.new(@escaped_column_separator)
+ else
+ @split_column_separator = @column_separator
+ end
+ end
+ end
+
+ def prepare_unquoted
+ return if @quote_character.nil?
+
+ no_unquoted_values = "\r\n".encode(@encoding)
+ no_unquoted_values << @escaped_first_column_separator
+ unless @liberal_parsing
+ no_unquoted_values << @escaped_quote_character
+ end
+ @unquoted_value = Regexp.new("[^".encode(@encoding) +
+ no_unquoted_values +
+ "]+".encode(@encoding))
+ end
+
+ def resolve_row_separator(separator)
+ if separator == :auto
+ cr = "\r".encode(@encoding)
+ lf = "\n".encode(@encoding)
+ if @input.is_a?(StringIO)
+ pos = @input.pos
+ separator = detect_row_separator(@input.read, cr, lf)
+ @input.seek(pos)
+ elsif @input.respond_to?(:gets)
+ if @input.is_a?(File)
+ chunk_size = 32 * 1024
+ else
+ chunk_size = 1024
+ end
+ begin
+ while separator == :auto
+ #
+ # if we run out of data, it's probably a single line
+ # (ensure will set default value)
+ #
+ break unless sample = @input.gets(nil, chunk_size)
+
+ # extend sample if we're unsure of the line ending
+ if sample.end_with?(cr)
+ sample << (@input.gets(nil, 1) || "")
+ end
+
+ @samples << sample
+
+ separator = detect_row_separator(sample, cr, lf)
+ end
+ rescue IOError
+ # do nothing: ensure will set default
+ end
+ end
+ separator = InputRecordSeparator.value if separator == :auto
+ end
+ separator.to_s.encode(@encoding)
+ end
+
+ def detect_row_separator(sample, cr, lf)
+ lf_index = sample.index(lf)
+ if lf_index
+ cr_index = sample[0, lf_index].index(cr)
+ else
+ cr_index = sample.index(cr)
+ end
+ if cr_index and lf_index
+ if cr_index + 1 == lf_index
+ cr + lf
+ elsif cr_index < lf_index
+ cr
+ else
+ lf
+ end
+ elsif cr_index
+ cr
+ elsif lf_index
+ lf
+ else
+ :auto
+ end
+ end
+
+ def prepare_line
+ @lineno = 0
+ @last_line = nil
+ @scanner = nil
+ end
+
+ def last_line
+ if @scanner
+ @last_line ||= @scanner.keep_end
+ else
+ @last_line
+ end
+ end
+
+ def prepare_header
+ @return_headers = @options[:return_headers]
+
+ headers = @options[:headers]
+ case headers
+ when Array
+ @raw_headers = headers
+ quoted_fields = [false] * @raw_headers.size
+ @use_headers = true
+ when String
+ @raw_headers, quoted_fields = parse_headers(headers)
+ @use_headers = true
+ when nil, false
+ @raw_headers = nil
+ @use_headers = false
+ else
+ @raw_headers = nil
+ @use_headers = true
+ end
+ if @raw_headers
+ @headers = adjust_headers(@raw_headers, quoted_fields)
+ else
+ @headers = nil
+ end
+ end
+
+ def parse_headers(row)
+ quoted_fields = []
+ converter = lambda do |field, info|
+ quoted_fields << info.quoted?
+ field
+ end
+ headers = CSV.parse_line(row,
+ col_sep: @column_separator,
+ row_sep: @row_separator,
+ quote_char: @quote_character,
+ converters: [converter])
+ [headers, quoted_fields]
+ end
+
+ def adjust_headers(headers, quoted_fields)
+ adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields)
+ adjusted_headers.each {|h| h.freeze if h.is_a? String}
+ adjusted_headers
+ end
+
+ def prepare_parser
+ @may_quoted = may_quoted?
+ end
+
+ def may_quoted?
+ return false if @quote_character.nil?
+
+ if @input.is_a?(StringIO)
+ pos = @input.pos
+ sample = @input.read
+ @input.seek(pos)
+ else
+ return false if @samples.empty?
+ sample = @samples.first
+ end
+ sample[0, 128].index(@quote_character)
+ end
+
+ class UnoptimizedStringIO # :nodoc:
+ def initialize(string)
+ @io = StringIO.new(string, "rb:#{string.encoding}")
+ end
+
+ def gets(*args)
+ @io.gets(*args)
+ end
+
+ def each_line(*args, &block)
+ @io.each_line(*args, &block)
+ end
+
+ def eof?
+ @io.eof?
+ end
+ end
+
+ SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
+ if SCANNER_TEST
+ SCANNER_TEST_CHUNK_SIZE_NAME = "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"
+ SCANNER_TEST_CHUNK_SIZE_VALUE = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
+ def build_scanner
+ inputs = @samples.collect do |sample|
+ UnoptimizedStringIO.new(sample)
+ end
+ if @input.is_a?(StringIO)
+ inputs << UnoptimizedStringIO.new(@input.read)
+ else
+ inputs << @input
+ end
+ begin
+ chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
+ rescue # Ractor::IsolationError
+ # Ractor on Ruby 3.0 can't read ENV value.
+ chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE
+ end
+ chunk_size = Integer((chunk_size_value || "1"), 10)
+ InputsScanner.new(inputs,
+ @encoding,
+ @row_separator,
+ chunk_size: chunk_size)
+ end
+ else
+ def build_scanner
+ string = nil
+ if @samples.empty? and @input.is_a?(StringIO)
+ string = @input.read
+ elsif @samples.size == 1 and
+ @input != ARGF and
+ @input.respond_to?(:eof?) and
+ @input.eof?
+ string = @samples[0]
+ end
+ if string
+ unless string.valid_encoding?
+ index = string.lines(@row_separator).index do |line|
+ !line.valid_encoding?
+ end
+ if index
+ message = "Invalid byte sequence in #{@encoding}"
+ raise MalformedCSVError.new(message, @lineno + index + 1)
+ end
+ end
+ Scanner.new(string)
+ else
+ inputs = @samples.collect do |sample|
+ StringIO.new(sample)
+ end
+ inputs << @input
+ InputsScanner.new(inputs, @encoding, @row_separator)
+ end
+ end
+ end
+
+ def skip_needless_lines
+ return unless @skip_lines
+
+ until @scanner.eos?
+ @scanner.keep_start
+ line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
+ line << @row_separator if parse_row_end
+ if skip_line?(line)
+ @lineno += 1
+ @scanner.keep_drop
+ else
+ @scanner.keep_back
+ return
+ end
+ end
+ end
+
+ def skip_line?(line)
+ line = line.delete_suffix(@row_separator)
+ case @skip_lines
+ when String
+ line.include?(@skip_lines)
+ when Regexp
+ @skip_lines.match?(line)
+ else
+ @skip_lines.match(line)
+ end
+ end
+
+ def validate_field_size(field)
+ return unless @max_field_size
+ return if field.size <= @max_field_size
+ ignore_broken_line
+ message = "Field size exceeded: #{field.size} > #{@max_field_size}"
+ raise MalformedCSVError.new(message, @lineno)
+ end
+
+ def parse_no_quote(&block)
+ @scanner.each_line(@row_separator) do |line|
+ next if @skip_lines and skip_line?(line)
+ original_line = line
+ line = line.delete_suffix(@row_separator)
+
+ if line.empty?
+ next if @skip_blanks
+ row = []
+ quoted_fields = []
+ else
+ line = strip_value(line)
+ row = line.split(@split_column_separator, -1)
+ quoted_fields = [false] * row.size
+ if @max_field_size
+ row.each do |column|
+ validate_field_size(column)
+ end
+ end
+ n_columns = row.size
+ i = 0
+ while i < n_columns
+ row[i] = nil if row[i].empty?
+ i += 1
+ end
+ end
+ @last_line = original_line
+ emit_row(row, quoted_fields, &block)
+ end
+ end
+
+ def parse_quotable_loose(&block)
+ @scanner.keep_start
+ @scanner.each_line(@row_separator) do |line|
+ if @skip_lines and skip_line?(line)
+ @scanner.keep_drop
+ @scanner.keep_start
+ next
+ end
+ original_line = line
+ line = line.delete_suffix(@row_separator)
+
+ if line.empty?
+ if @skip_blanks
+ @scanner.keep_drop
+ @scanner.keep_start
+ next
+ end
+ row = []
+ quoted_fields = []
+ elsif line.include?(@cr) or line.include?(@lf)
+ @scanner.keep_back
+ @need_robust_parsing = true
+ return parse_quotable_robust(&block)
+ else
+ row = line.split(@split_column_separator, -1)
+ quoted_fields = []
+ n_columns = row.size
+ i = 0
+ while i < n_columns
+ column = row[i]
+ if column.empty?
+ quoted_fields << false
+ row[i] = nil
+ else
+ n_quotes = column.count(@quote_character)
+ if n_quotes.zero?
+ quoted_fields << false
+ # no quote
+ elsif n_quotes == 2 and
+ column.start_with?(@quote_character) and
+ column.end_with?(@quote_character)
+ quoted_fields << true
+ row[i] = column[1..-2]
+ else
+ @scanner.keep_back
+ @need_robust_parsing = true
+ return parse_quotable_robust(&block)
+ end
+ validate_field_size(row[i])
+ end
+ i += 1
+ end
+ end
+ @scanner.keep_drop
+ @scanner.keep_start
+ @last_line = original_line
+ emit_row(row, quoted_fields, &block)
+ end
+ @scanner.keep_drop
+ end
+
+ def parse_quotable_robust(&block)
+ row = []
+ quoted_fields = []
+ skip_needless_lines
+ start_row
+ while true
+ @quoted_column_value = false
+ @unquoted_column_value = false
+ @scanner.scan_all(@strip_value) if @strip_value
+ value = parse_column_value
+ if value
+ @scanner.scan_all(@strip_value) if @strip_value
+ validate_field_size(value)
+ end
+ if parse_column_end
+ row << value
+ quoted_fields << @quoted_column_value
+ elsif parse_row_end
+ if row.empty? and value.nil?
+ emit_row([], [], &block) unless @skip_blanks
+ else
+ row << value
+ quoted_fields << @quoted_column_value
+ emit_row(row, quoted_fields, &block)
+ row = []
+ quoted_fields = []
+ end
+ skip_needless_lines
+ start_row
+ elsif @scanner.eos?
+ break if row.empty? and value.nil?
+ row << value
+ quoted_fields << @quoted_column_value
+ emit_row(row, quoted_fields, &block)
+ break
+ else
+ if @quoted_column_value
+ if liberal_parsing? and (new_line = @scanner.check(@line_end))
+ message =
+ "Illegal end-of-line sequence outside of a quoted field " +
+ "<#{new_line.inspect}>"
+ else
+ message = "Any value after quoted field isn't allowed"
+ end
+ ignore_broken_line
+ raise MalformedCSVError.new(message, @lineno)
+ elsif @unquoted_column_value and
+ (new_line = @scanner.scan(@line_end))
+ ignore_broken_line
+ message = "Unquoted fields do not allow new line " +
+ "<#{new_line.inspect}>"
+ raise MalformedCSVError.new(message, @lineno)
+ elsif @scanner.rest.start_with?(@quote_character)
+ ignore_broken_line
+ message = "Illegal quoting"
+ raise MalformedCSVError.new(message, @lineno)
+ elsif (new_line = @scanner.scan(@line_end))
+ ignore_broken_line
+ message = "New line must be <#{@row_separator.inspect}> " +
+ "not <#{new_line.inspect}>"
+ raise MalformedCSVError.new(message, @lineno)
+ else
+ ignore_broken_line
+ raise MalformedCSVError.new("TODO: Meaningful message",
+ @lineno)
+ end
+ end
+ end
+ end
+
+ def parse_column_value
+ if @liberal_parsing
+ quoted_value = parse_quoted_column_value
+ if quoted_value
+ @scanner.scan_all(@strip_value) if @strip_value
+ unquoted_value = parse_unquoted_column_value
+ if unquoted_value
+ if @double_quote_outside_quote
+ unquoted_value = unquoted_value.gsub(@quote_character * 2,
+ @quote_character)
+ if quoted_value.empty? # %Q{""...} case
+ return @quote_character + unquoted_value
+ end
+ end
+ @quote_character + quoted_value + @quote_character + unquoted_value
+ else
+ quoted_value
+ end
+ else
+ parse_unquoted_column_value
+ end
+ elsif @may_quoted
+ parse_quoted_column_value ||
+ parse_unquoted_column_value
+ else
+ parse_unquoted_column_value ||
+ parse_quoted_column_value
+ end
+ end
+
+ def parse_unquoted_column_value
+ value = @scanner.scan_all(@unquoted_value)
+ return nil unless value
+
+ @unquoted_column_value = true
+ if @first_column_separators
+ while true
+ @scanner.keep_start
+ is_column_end = @column_ends.all? do |column_end|
+ @scanner.scan(column_end)
+ end
+ @scanner.keep_back
+ break if is_column_end
+ sub_separator = @scanner.scan_all(@first_column_separators)
+ break if sub_separator.nil?
+ value << sub_separator
+ sub_value = @scanner.scan_all(@unquoted_value)
+ break if sub_value.nil?
+ value << sub_value
+ end
+ end
+ value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
+ if @rstrip_value
+ value.gsub!(@rstrip_value, "")
+ end
+ value
+ end
+
+ def parse_quoted_column_value
+ quotes = @scanner.scan_all(@quotes)
+ return nil unless quotes
+
+ @quoted_column_value = true
+ n_quotes = quotes.size
+ if (n_quotes % 2).zero?
+ quotes[0, (n_quotes - 2) / 2]
+ else
+ value = quotes[0, n_quotes / 2]
+ while true
+ quoted_value = @scanner.scan_all(@quoted_value)
+ value << quoted_value if quoted_value
+ if @backslash_quote
+ if @scanner.scan(@escaped_backslash)
+ if @scanner.scan(@escaped_quote)
+ value << @quote_character
+ else
+ value << @backslash_character
+ end
+ next
+ end
+ end
+
+ quotes = @scanner.scan_all(@quotes)
+ unless quotes
+ ignore_broken_line
+ message = "Unclosed quoted field"
+ raise MalformedCSVError.new(message, @lineno)
+ end
+ n_quotes = quotes.size
+ if n_quotes == 1
+ break
+ else
+ value << quotes[0, n_quotes / 2]
+ break if (n_quotes % 2) == 1
+ end
+ end
+ value
+ end
+ end
+
+ def parse_column_end
+ return true if @scanner.scan(@column_end)
+ return false unless @column_ends
+
+ @scanner.keep_start
+ if @column_ends.all? {|column_end| @scanner.scan(column_end)}
+ @scanner.keep_drop
+ true
+ else
+ @scanner.keep_back
+ false
+ end
+ end
+
+ def parse_row_end
+ return true if @scanner.scan(@row_end)
+ return false unless @row_ends
+ @scanner.keep_start
+ if @row_ends.all? {|row_end| @scanner.scan(row_end)}
+ @scanner.keep_drop
+ true
+ else
+ @scanner.keep_back
+ false
+ end
+ end
+
+ def strip_value(value)
+ return value unless @strip
+ return value if value.nil?
+
+ case @strip
+ when String
+ while value.delete_prefix!(@strip)
+ # do nothing
+ end
+ while value.delete_suffix!(@strip)
+ # do nothing
+ end
+ else
+ value.strip!
+ end
+ value
+ end
+
+ def ignore_broken_line
+ @scanner.scan_all(@not_line_end)
+ @scanner.scan_all(@line_end)
+ @lineno += 1
+ end
+
+ def start_row
+ if @last_line
+ @last_line = nil
+ else
+ @scanner.keep_drop
+ end
+ @scanner.keep_start
+ end
+
+ def emit_row(row, quoted_fields, &block)
+ @lineno += 1
+
+ raw_row = row
+ if @use_headers
+ if @headers.nil?
+ @headers = adjust_headers(row, quoted_fields)
+ return unless @return_headers
+ row = Row.new(@headers, row, true)
+ else
+ row = Row.new(@headers,
+ @fields_converter.convert(raw_row, @headers, @lineno, quoted_fields))
+ end
+ else
+ # convert fields, if needed...
+ row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields)
+ end
+
+ # inject unconverted fields and accessor, if requested...
+ if @unconverted_fields and not row.respond_to?(:unconverted_fields)
+ add_unconverted_fields(row, raw_row)
+ end
+
+ yield(row)
+ end
+
+ # This method injects an instance variable <tt>unconverted_fields</tt> into
+ # +row+ and an accessor method for +row+ called unconverted_fields(). The
+ # variable is set to the contents of +fields+.
+ def add_unconverted_fields(row, fields)
+ class << row
+ attr_reader :unconverted_fields
+ end
+ row.instance_variable_set(:@unconverted_fields, fields)
+ row
+ end
+ end
+end
diff --git a/lib/csv/row.rb b/lib/csv/row.rb
new file mode 100644
index 0000000000..500adb1882
--- /dev/null
+++ b/lib/csv/row.rb
@@ -0,0 +1,757 @@
+# frozen_string_literal: true
+
+require "forwardable"
+
+class CSV
+ # = \CSV::Row
+ # A \CSV::Row instance represents a \CSV table row.
+ # (see {class CSV}[../CSV.html]).
+ #
+ # The instance may have:
+ # - Fields: each is an object, not necessarily a \String.
+ # - Headers: each serves a key, and also need not be a \String.
+ #
+ # === Instance Methods
+ #
+ # \CSV::Row has three groups of instance methods:
+ # - Its own internally defined instance methods.
+ # - Methods included by module Enumerable.
+ # - Methods delegated to class Array.:
+ # * Array#empty?
+ # * Array#length
+ # * Array#size
+ #
+ # == Creating a \CSV::Row Instance
+ #
+ # Commonly, a new \CSV::Row instance is created by parsing \CSV source
+ # that has headers:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.each {|row| p row }
+ # Output:
+ # #<CSV::Row "Name":"foo" "Value":"0">
+ # #<CSV::Row "Name":"bar" "Value":"1">
+ # #<CSV::Row "Name":"baz" "Value":"2">
+ #
+ # You can also create a row directly. See ::new.
+ #
+ # == Headers
+ #
+ # Like a \CSV::Table, a \CSV::Row has headers.
+ #
+ # A \CSV::Row that was created by parsing \CSV source
+ # inherits its headers from the table:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table.first
+ # row.headers # => ["Name", "Value"]
+ #
+ # You can also create a new row with headers;
+ # like the keys in a \Hash, the headers need not be Strings:
+ # row = CSV::Row.new([:name, :value], ['foo', 0])
+ # row.headers # => [:name, :value]
+ #
+ # The new row retains its headers even if added to a table
+ # that has headers:
+ # table << row # => #<CSV::Table mode:col_or_row row_count:5>
+ # row.headers # => [:name, :value]
+ # row[:name] # => "foo"
+ # row['Name'] # => nil
+ #
+ #
+ #
+ # == Accessing Fields
+ #
+ # You may access a field in a \CSV::Row with either its \Integer index
+ # (\Array-style) or its header (\Hash-style).
+ #
+ # Fetch a field using method #[]:
+ # row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
+ # row[1] # => 0
+ # row['Value'] # => 0
+ #
+ # Set a field using method #[]=:
+ # row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
+ # row # => #<CSV::Row "Name":"foo" "Value":0>
+ # row[0] = 'bar'
+ # row['Value'] = 1
+ # row # => #<CSV::Row "Name":"bar" "Value":1>
+ #
+ class Row
+ # :call-seq:
+ # CSV::Row.new(headers, fields, header_row = false) -> csv_row
+ #
+ # Returns the new \CSV::Row instance constructed from
+ # arguments +headers+ and +fields+; both should be Arrays;
+ # note that the fields need not be Strings:
+ # row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
+ # row # => #<CSV::Row "Name":"foo" "Value":0>
+ #
+ # If the \Array lengths are different, the shorter is +nil+-filled:
+ # row = CSV::Row.new(['Name', 'Value', 'Date', 'Size'], ['foo', 0])
+ # row # => #<CSV::Row "Name":"foo" "Value":0 "Date":nil "Size":nil>
+ #
+ # Each \CSV::Row object is either a <i>field row</i> or a <i>header row</i>;
+ # by default, a new row is a field row; for the row created above:
+ # row.field_row? # => true
+ # row.header_row? # => false
+ #
+ # If the optional argument +header_row+ is given as +true+,
+ # the created row is a header row:
+ # row = CSV::Row.new(['Name', 'Value'], ['foo', 0], header_row = true)
+ # row # => #<CSV::Row "Name":"foo" "Value":0>
+ # row.field_row? # => false
+ # row.header_row? # => true
+ def initialize(headers, fields, header_row = false)
+ @header_row = header_row
+ headers.each { |h| h.freeze if h.is_a? String }
+
+ # handle extra headers or fields
+ @row = if headers.size >= fields.size
+ headers.zip(fields)
+ else
+ fields.zip(headers).each(&:reverse!)
+ end
+ end
+
+ # Internal data format used to compare equality.
+ attr_reader :row
+ protected :row
+
+ ### Array Delegation ###
+
+ extend Forwardable
+ def_delegators :@row, :empty?, :length, :size
+
+ # :call-seq:
+ # row.initialize_copy(other_row) -> self
+ #
+ # Calls superclass method.
+ def initialize_copy(other)
+ super_return_value = super
+ @row = @row.collect(&:dup)
+ super_return_value
+ end
+
+ # :call-seq:
+ # row.header_row? -> true or false
+ #
+ # Returns +true+ if this is a header row, +false+ otherwise.
+ def header_row?
+ @header_row
+ end
+
+ # :call-seq:
+ # row.field_row? -> true or false
+ #
+ # Returns +true+ if this is a field row, +false+ otherwise.
+ def field_row?
+ not header_row?
+ end
+
+ # :call-seq:
+ # row.headers -> array_of_headers
+ #
+ # Returns the headers for this row:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table.first
+ # row.headers # => ["Name", "Value"]
+ def headers
+ @row.map(&:first)
+ end
+
+ # :call-seq:
+ # field(index) -> value
+ # field(header) -> value
+ # field(header, offset) -> value
+ #
+ # Returns the field value for the given +index+ or +header+.
+ #
+ # ---
+ #
+ # Fetch field value by \Integer index:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.field(0) # => "foo"
+ # row.field(1) # => "bar"
+ #
+ # Counts backward from the last column if +index+ is negative:
+ # row.field(-1) # => "0"
+ # row.field(-2) # => "foo"
+ #
+ # Returns +nil+ if +index+ is out of range:
+ # row.field(2) # => nil
+ # row.field(-3) # => nil
+ #
+ # ---
+ #
+ # Fetch field value by header (first found):
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.field('Name') # => "Foo"
+ #
+ # Fetch field value by header, ignoring +offset+ leading fields:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.field('Name', 2) # => "Baz"
+ #
+ # Returns +nil+ if the header does not exist.
+ def field(header_or_index, minimum_index = 0)
+ # locate the pair
+ finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc
+ pair = @row[minimum_index..-1].public_send(finder, header_or_index)
+
+ # return the field if we have a pair
+ if pair.nil?
+ nil
+ else
+ header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last
+ end
+ end
+ alias_method :[], :field
+
+ #
+ # :call-seq:
+ # fetch(header) -> value
+ # fetch(header, default) -> value
+ # fetch(header) {|row| ... } -> value
+ #
+ # Returns the field value as specified by +header+.
+ #
+ # ---
+ #
+ # With the single argument +header+, returns the field value
+ # for that header (first found):
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.fetch('Name') # => "Foo"
+ #
+ # Raises exception +KeyError+ if the header does not exist.
+ #
+ # ---
+ #
+ # With arguments +header+ and +default+ given,
+ # returns the field value for the header (first found)
+ # if the header exists, otherwise returns +default+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.fetch('Name', '') # => "Foo"
+ # row.fetch(:nosuch, '') # => ""
+ #
+ # ---
+ #
+ # With argument +header+ and a block given,
+ # returns the field value for the header (first found)
+ # if the header exists; otherwise calls the block
+ # and returns its return value:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.fetch('Name') {|header| fail 'Cannot happen' } # => "Foo"
+ # row.fetch(:nosuch) {|header| "Header '#{header} not found'" } # => "Header 'nosuch not found'"
+ def fetch(header, *varargs)
+ raise ArgumentError, "Too many arguments" if varargs.length > 1
+ pair = @row.assoc(header)
+ if pair
+ pair.last
+ else
+ if block_given?
+ yield header
+ elsif varargs.empty?
+ raise KeyError, "key not found: #{header}"
+ else
+ varargs.first
+ end
+ end
+ end
+
+ # :call-seq:
+ # row.has_key?(header) -> true or false
+ #
+ # Returns +true+ if there is a field with the given +header+,
+ # +false+ otherwise.
+ def has_key?(header)
+ !!@row.assoc(header)
+ end
+ alias_method :include?, :has_key?
+ alias_method :key?, :has_key?
+ alias_method :member?, :has_key?
+ alias_method :header?, :has_key?
+
+ #
+ # :call-seq:
+ # row[index] = value -> value
+ # row[header, offset] = value -> value
+ # row[header] = value -> value
+ #
+ # Assigns the field value for the given +index+ or +header+;
+ # returns +value+.
+ #
+ # ---
+ #
+ # Assign field value by \Integer index:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row[0] = 'Bat'
+ # row[1] = 3
+ # row # => #<CSV::Row "Name":"Bat" "Value":3>
+ #
+ # Counts backward from the last column if +index+ is negative:
+ # row[-1] = 4
+ # row[-2] = 'Bam'
+ # row # => #<CSV::Row "Name":"Bam" "Value":4>
+ #
+ # Extends the row with <tt>nil:nil</tt> if positive +index+ is not in the row:
+ # row[4] = 5
+ # row # => #<CSV::Row "Name":"bad" "Value":4 nil:nil nil:nil nil:5>
+ #
+ # Raises IndexError if negative +index+ is too small (too far from zero).
+ #
+ # ---
+ #
+ # Assign field value by header (first found):
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row['Name'] = 'Bat'
+ # row # => #<CSV::Row "Name":"Bat" "Name":"Bar" "Name":"Baz">
+ #
+ # Assign field value by header, ignoring +offset+ leading fields:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row['Name', 2] = 4
+ # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":4>
+ #
+ # Append new field by (new) header:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row['New'] = 6
+ # row# => #<CSV::Row "Name":"foo" "Value":"0" "New":6>
+ def []=(*args)
+ value = args.pop
+
+ if args.first.is_a? Integer
+ if @row[args.first].nil? # extending past the end with index
+ @row[args.first] = [nil, value]
+ @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
+ else # normal index assignment
+ @row[args.first][1] = value
+ end
+ else
+ index = index(*args)
+ if index.nil? # appending a field
+ self << [args.first, value]
+ else # normal header assignment
+ @row[index][1] = value
+ end
+ end
+ end
+
+ #
+ # :call-seq:
+ # row << [header, value] -> self
+ # row << hash -> self
+ # row << value -> self
+ #
+ # Adds a field to +self+; returns +self+:
+ #
+ # If the argument is a 2-element \Array <tt>[header, value]</tt>,
+ # a field is added with the given +header+ and +value+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row << ['NAME', 'Bat']
+ # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" "NAME":"Bat">
+ #
+ # If the argument is a \Hash, each <tt>key-value</tt> pair is added
+ # as a field with header +key+ and value +value+.
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row << {NAME: 'Bat', name: 'Bam'}
+ # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" NAME:"Bat" name:"Bam">
+ #
+ # Otherwise, the given +value+ is added as a field with no header.
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row << 'Bag'
+ # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" nil:"Bag">
+ def <<(arg)
+ if arg.is_a?(Array) and arg.size == 2 # appending a header and name
+ @row << arg
+ elsif arg.is_a?(Hash) # append header and name pairs
+ arg.each { |pair| @row << pair }
+ else # append field value
+ @row << [nil, arg]
+ end
+
+ self # for chaining
+ end
+
+ # :call-seq:
+ # row.push(*values) -> self
+ #
+ # Appends each of the given +values+ to +self+ as a field; returns +self+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.push('Bat', 'Bam')
+ # row # => #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz" nil:"Bat" nil:"Bam">
+ def push(*args)
+ args.each { |arg| self << arg }
+
+ self # for chaining
+ end
+
+ #
+ # :call-seq:
+ # delete(index) -> [header, value] or nil
+ # delete(header) -> [header, value] or empty_array
+ # delete(header, offset) -> [header, value] or empty_array
+ #
+ # Removes a specified field from +self+; returns the 2-element \Array
+ # <tt>[header, value]</tt> if the field exists.
+ #
+ # If an \Integer argument +index+ is given,
+ # removes and returns the field at offset +index+,
+ # or returns +nil+ if the field does not exist:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.delete(1) # => ["Name", "Bar"]
+ # row.delete(50) # => nil
+ #
+ # Otherwise, if the single argument +header+ is given,
+ # removes and returns the first-found field with the given header,
+ # of returns a new empty \Array if the field does not exist:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.delete('Name') # => ["Name", "Foo"]
+ # row.delete('NAME') # => []
+ #
+ # If argument +header+ and \Integer argument +offset+ are given,
+ # removes and returns the first-found field with the given header
+ # whose +index+ is at least as large as +offset+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.delete('Name', 1) # => ["Name", "Bar"]
+ # row.delete('NAME', 1) # => []
+ def delete(header_or_index, minimum_index = 0)
+ if header_or_index.is_a? Integer # by index
+ @row.delete_at(header_or_index)
+ elsif i = index(header_or_index, minimum_index) # by header
+ @row.delete_at(i)
+ else
+ [ ]
+ end
+ end
+
+ # :call-seq:
+ # row.delete_if {|header, value| ... } -> self
+ #
+ # Removes fields from +self+ as selected by the block; returns +self+.
+ #
+ # Removes each field for which the block returns a truthy value:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.delete_if {|header, value| value.start_with?('B') } # => true
+ # row # => #<CSV::Row "Name":"Foo">
+ # row.delete_if {|header, value| header.start_with?('B') } # => false
+ #
+ # If no block is given, returns a new Enumerator:
+ # row.delete_if # => #<Enumerator: #<CSV::Row "Name":"Foo">:delete_if>
+ def delete_if(&block)
+ return enum_for(__method__) { size } unless block_given?
+
+ @row.delete_if(&block)
+
+ self # for chaining
+ end
+
+ # :call-seq:
+ # self.fields(*specifiers) -> array_of_fields
+ #
+ # Returns field values per the given +specifiers+, which may be any mixture of:
+ # - \Integer index.
+ # - \Range of \Integer indexes.
+ # - 2-element \Array containing a header and offset.
+ # - Header.
+ # - \Range of headers.
+ #
+ # For +specifier+ in one of the first four cases above,
+ # returns the result of <tt>self.field(specifier)</tt>; see #field.
+ #
+ # Although there may be any number of +specifiers+,
+ # the examples here will illustrate one at a time.
+ #
+ # When the specifier is an \Integer +index+,
+ # returns <tt>self.field(index)</tt>L
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.fields(1) # => ["Bar"]
+ #
+ # When the specifier is a \Range of \Integers +range+,
+ # returns <tt>self.field(range)</tt>:
+ # row.fields(1..2) # => ["Bar", "Baz"]
+ #
+ # When the specifier is a 2-element \Array +array+,
+ # returns <tt>self.field(array)</tt>L
+ # row.fields('Name', 1) # => ["Foo", "Bar"]
+ #
+ # When the specifier is a header +header+,
+ # returns <tt>self.field(header)</tt>L
+ # row.fields('Name') # => ["Foo"]
+ #
+ # When the specifier is a \Range of headers +range+,
+ # forms a new \Range +new_range+ from the indexes of
+ # <tt>range.start</tt> and <tt>range.end</tt>,
+ # and returns <tt>self.field(new_range)</tt>:
+ # source = "Name,NAME,name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.fields('Name'..'NAME') # => ["Foo", "Bar"]
+ #
+ # Returns all fields if no argument given:
+ # row.fields # => ["Foo", "Bar", "Baz"]
+ def fields(*headers_and_or_indices)
+ if headers_and_or_indices.empty? # return all fields--no arguments
+ @row.map(&:last)
+ else # or work like values_at()
+ all = []
+ headers_and_or_indices.each do |h_or_i|
+ if h_or_i.is_a? Range
+ index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
+ index(h_or_i.begin)
+ index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
+ index(h_or_i.end)
+ new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
+ (index_begin..index_end)
+ all.concat(fields.values_at(new_range))
+ else
+ all << field(*Array(h_or_i))
+ end
+ end
+ return all
+ end
+ end
+ alias_method :values_at, :fields
+
+ # :call-seq:
+ # index(header) -> index
+ # index(header, offset) -> index
+ #
+ # Returns the index for the given header, if it exists;
+ # otherwise returns +nil+.
+ #
+ # With the single argument +header+, returns the index
+ # of the first-found field with the given +header+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.index('Name') # => 0
+ # row.index('NAME') # => nil
+ #
+ # With arguments +header+ and +offset+,
+ # returns the index of the first-found field with given +header+,
+ # but ignoring the first +offset+ fields:
+ # row.index('Name', 1) # => 1
+ # row.index('Name', 3) # => nil
+ def index(header, minimum_index = 0)
+ # find the pair
+ index = headers[minimum_index..-1].index(header)
+ # return the index at the right offset, if we found one
+ index.nil? ? nil : index + minimum_index
+ end
+
+ # :call-seq:
+ # row.field?(value) -> true or false
+ #
+ # Returns +true+ if +value+ is a field in this row, +false+ otherwise:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.field?('Bar') # => true
+ # row.field?('BAR') # => false
+ def field?(data)
+ fields.include? data
+ end
+
+ include Enumerable
+
+ # :call-seq:
+ # row.each {|header, value| ... } -> self
+ #
+ # Calls the block with each header-value pair; returns +self+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.each {|header, value| p [header, value] }
+ # Output:
+ # ["Name", "Foo"]
+ # ["Name", "Bar"]
+ # ["Name", "Baz"]
+ #
+ # If no block is given, returns a new Enumerator:
+ # row.each # => #<Enumerator: #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz">:each>
+ def each(&block)
+ return enum_for(__method__) { size } unless block_given?
+
+ @row.each(&block)
+
+ self # for chaining
+ end
+
+ alias_method :each_pair, :each
+
+ # :call-seq:
+ # row == other -> true or false
+ #
+ # Returns +true+ if +other+ is a /CSV::Row that has the same
+ # fields (headers and values) in the same order as +self+;
+ # otherwise returns +false+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # other_row = table[0]
+ # row == other_row # => true
+ # other_row = table[1]
+ # row == other_row # => false
+ def ==(other)
+ return @row == other.row if other.is_a? CSV::Row
+ @row == other
+ end
+
+ # :call-seq:
+ # row.to_h -> hash
+ #
+ # Returns the new \Hash formed by adding each header-value pair in +self+
+ # as a key-value pair in the \Hash.
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.to_h # => {"Name"=>"foo", "Value"=>"0"}
+ #
+ # Header order is preserved, but repeated headers are ignored:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.to_h # => {"Name"=>"Foo"}
+ def to_h
+ hash = {}
+ each do |key, _value|
+ hash[key] = self[key] unless hash.key?(key)
+ end
+ hash
+ end
+ alias_method :to_hash, :to_h
+
+ # :call-seq:
+ # row.deconstruct_keys(keys) -> hash
+ #
+ # Returns the new \Hash suitable for pattern matching containing only the
+ # keys specified as an argument.
+ def deconstruct_keys(keys)
+ if keys.nil?
+ to_h
+ else
+ keys.to_h { |key| [key, self[key]] }
+ end
+ end
+
+ alias_method :to_ary, :to_a
+
+ # :call-seq:
+ # row.deconstruct -> array
+ #
+ # Returns the new \Array suitable for pattern matching containing the values
+ # of the row.
+ def deconstruct
+ fields
+ end
+
+ # :call-seq:
+ # row.to_csv -> csv_string
+ #
+ # Returns the row as a \CSV String. Headers are not included:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.to_csv # => "foo,0\n"
+ def to_csv(**options)
+ fields.to_csv(**options)
+ end
+ alias_method :to_s, :to_csv
+
+ # :call-seq:
+ # row.dig(index_or_header, *identifiers) -> object
+ #
+ # Finds and returns the object in nested object that is specified
+ # by +index_or_header+ and +specifiers+.
+ #
+ # The nested objects may be instances of various classes.
+ # See {Dig Methods}[https://docs.ruby-lang.org/en/master/dig_methods_rdoc.html].
+ #
+ # Examples:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.dig(1) # => "0"
+ # row.dig('Value') # => "0"
+ # row.dig(5) # => nil
+ def dig(index_or_header, *indexes)
+ value = field(index_or_header)
+ if value.nil?
+ nil
+ elsif indexes.empty?
+ value
+ else
+ unless value.respond_to?(:dig)
+ raise TypeError, "#{value.class} does not have \#dig method"
+ end
+ value.dig(*indexes)
+ end
+ end
+
+ # :call-seq:
+ # row.inspect -> string
+ #
+ # Returns an ASCII-compatible \String showing:
+ # - Class \CSV::Row.
+ # - Header-value pairs.
+ # Example:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.inspect # => "#<CSV::Row \"Name\":\"foo\" \"Value\":\"0\">"
+ def inspect
+ str = ["#<", self.class.to_s]
+ each do |header, field|
+ str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) <<
+ ":" << field.inspect
+ end
+ str << ">"
+ begin
+ str.join('')
+ rescue # any encoding error
+ str.map do |s|
+ e = Encoding::Converter.asciicompat_encoding(s.encoding)
+ e ? s.encode(e) : s.force_encoding("ASCII-8BIT")
+ end.join('')
+ end
+ end
+ end
+end
diff --git a/lib/csv/table.rb b/lib/csv/table.rb
new file mode 100644
index 0000000000..90af50869d
--- /dev/null
+++ b/lib/csv/table.rb
@@ -0,0 +1,1056 @@
+# frozen_string_literal: true
+
+require "forwardable"
+
+class CSV
+ # = \CSV::Table
+ # A \CSV::Table instance represents \CSV data.
+ # (see {class CSV}[../CSV.html]).
+ #
+ # The instance may have:
+ # - Rows: each is a Table::Row object.
+ # - Headers: names for the columns.
+ #
+ # === Instance Methods
+ #
+ # \CSV::Table has three groups of instance methods:
+ # - Its own internally defined instance methods.
+ # - Methods included by module Enumerable.
+ # - Methods delegated to class Array.:
+ # * Array#empty?
+ # * Array#length
+ # * Array#size
+ #
+ # == Creating a \CSV::Table Instance
+ #
+ # Commonly, a new \CSV::Table instance is created by parsing \CSV source
+ # using headers:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.class # => CSV::Table
+ #
+ # You can also create an instance directly. See ::new.
+ #
+ # == Headers
+ #
+ # If a table has headers, the headers serve as labels for the columns of data.
+ # Each header serves as the label for its column.
+ #
+ # The headers for a \CSV::Table object are stored as an \Array of Strings.
+ #
+ # Commonly, headers are defined in the first row of \CSV source:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.headers # => ["Name", "Value"]
+ #
+ # If no headers are defined, the \Array is empty:
+ # table = CSV::Table.new([])
+ # table.headers # => []
+ #
+ # == Access Modes
+ #
+ # \CSV::Table provides three modes for accessing table data:
+ # - \Row mode.
+ # - Column mode.
+ # - Mixed mode (the default for a new table).
+ #
+ # The access mode for a\CSV::Table instance affects the behavior
+ # of some of its instance methods:
+ # - #[]
+ # - #[]=
+ # - #delete
+ # - #delete_if
+ # - #each
+ # - #values_at
+ #
+ # === \Row Mode
+ #
+ # Set a table to row mode with method #by_row!:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ #
+ # Specify a single row by an \Integer index:
+ # # Get a row.
+ # table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
+ # # Set a row, then get it.
+ # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
+ # table[1] # => #<CSV::Row "Name":"bam" "Value":3>
+ #
+ # Specify a sequence of rows by a \Range:
+ # # Get rows.
+ # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
+ # # Set rows, then get them.
+ # table[1..2] = [
+ # CSV::Row.new(['Name', 'Value'], ['bat', 4]),
+ # CSV::Row.new(['Name', 'Value'], ['bad', 5]),
+ # ]
+ # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
+ #
+ # === Column Mode
+ #
+ # Set a table to column mode with method #by_col!:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ #
+ # Specify a column by an \Integer index:
+ # # Get a column.
+ # table[0]
+ # # Set a column, then get it.
+ # table[0] = ['FOO', 'BAR', 'BAZ']
+ # table[0] # => ["FOO", "BAR", "BAZ"]
+ #
+ # Specify a column by its \String header:
+ # # Get a column.
+ # table['Name'] # => ["FOO", "BAR", "BAZ"]
+ # # Set a column, then get it.
+ # table['Name'] = ['Foo', 'Bar', 'Baz']
+ # table['Name'] # => ["Foo", "Bar", "Baz"]
+ #
+ # === Mixed Mode
+ #
+ # In mixed mode, you can refer to either rows or columns:
+ # - An \Integer index refers to a row.
+ # - A \Range index refers to multiple rows.
+ # - A \String index refers to a column.
+ #
+ # Set a table to mixed mode with method #by_col_or_row!:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
+ #
+ # Specify a single row by an \Integer index:
+ # # Get a row.
+ # table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
+ # # Set a row, then get it.
+ # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
+ # table[1] # => #<CSV::Row "Name":"bam" "Value":3>
+ #
+ # Specify a sequence of rows by a \Range:
+ # # Get rows.
+ # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
+ # # Set rows, then get them.
+ # table[1] = CSV::Row.new(['Name', 'Value'], ['bat', 4])
+ # table[2] = CSV::Row.new(['Name', 'Value'], ['bad', 5])
+ # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
+ #
+ # Specify a column by its \String header:
+ # # Get a column.
+ # table['Name'] # => ["foo", "bat", "bad"]
+ # # Set a column, then get it.
+ # table['Name'] = ['Foo', 'Bar', 'Baz']
+ # table['Name'] # => ["Foo", "Bar", "Baz"]
+ class Table
+ # :call-seq:
+ # CSV::Table.new(array_of_rows, headers = nil) -> csv_table
+ #
+ # Returns a new \CSV::Table object.
+ #
+ # - Argument +array_of_rows+ must be an \Array of CSV::Row objects.
+ # - Argument +headers+, if given, may be an \Array of Strings.
+ #
+ # ---
+ #
+ # Create an empty \CSV::Table object:
+ # table = CSV::Table.new([])
+ # table # => #<CSV::Table mode:col_or_row row_count:1>
+ #
+ # Create a non-empty \CSV::Table object:
+ # rows = [
+ # CSV::Row.new([], []),
+ # CSV::Row.new([], []),
+ # CSV::Row.new([], []),
+ # ]
+ # table = CSV::Table.new(rows)
+ # table # => #<CSV::Table mode:col_or_row row_count:4>
+ #
+ # ---
+ #
+ # If argument +headers+ is an \Array of Strings,
+ # those Strings become the table's headers:
+ # table = CSV::Table.new([], headers: ['Name', 'Age'])
+ # table.headers # => ["Name", "Age"]
+ #
+ # If argument +headers+ is not given and the table has rows,
+ # the headers are taken from the first row:
+ # rows = [
+ # CSV::Row.new(['Foo', 'Bar'], []),
+ # CSV::Row.new(['foo', 'bar'], []),
+ # CSV::Row.new(['FOO', 'BAR'], []),
+ # ]
+ # table = CSV::Table.new(rows)
+ # table.headers # => ["Foo", "Bar"]
+ #
+ # If argument +headers+ is not given and the table is empty (has no rows),
+ # the headers are also empty:
+ # table = CSV::Table.new([])
+ # table.headers # => []
+ #
+ # ---
+ #
+ # Raises an exception if argument +array_of_rows+ is not an \Array object:
+ # # Raises NoMethodError (undefined method `first' for :foo:Symbol):
+ # CSV::Table.new(:foo)
+ #
+ # Raises an exception if an element of +array_of_rows+ is not a \CSV::Table object:
+ # # Raises NoMethodError (undefined method `headers' for :foo:Symbol):
+ # CSV::Table.new([:foo])
+ def initialize(array_of_rows, headers: nil)
+ @table = array_of_rows
+ @headers = headers
+ unless @headers
+ if @table.empty?
+ @headers = []
+ else
+ @headers = @table.first.headers
+ end
+ end
+
+ @mode = :col_or_row
+ end
+
+ # The current access mode for indexing and iteration.
+ attr_reader :mode
+
+ # Internal data format used to compare equality.
+ attr_reader :table
+ protected :table
+
+ ### Array Delegation ###
+
+ extend Forwardable
+ def_delegators :@table, :empty?, :length, :size
+
+ # :call-seq:
+ # table.by_col -> table_dup
+ #
+ # Returns a duplicate of +self+, in column mode
+ # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]):
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.mode # => :col_or_row
+ # dup_table = table.by_col
+ # dup_table.mode # => :col
+ # dup_table.equal?(table) # => false # It's a dup
+ #
+ # This may be used to chain method calls without changing the mode
+ # (but also will affect performance and memory usage):
+ # dup_table.by_col['Name']
+ #
+ # Also note that changes to the duplicate table will not affect the original.
+ def by_col
+ self.class.new(@table.dup).by_col!
+ end
+
+ # :call-seq:
+ # table.by_col! -> self
+ #
+ # Sets the mode for +self+ to column mode
+ # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]); returns +self+:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.mode # => :col_or_row
+ # table1 = table.by_col!
+ # table.mode # => :col
+ # table1.equal?(table) # => true # Returned self
+ def by_col!
+ @mode = :col
+
+ self
+ end
+
+ # :call-seq:
+ # table.by_col_or_row -> table_dup
+ #
+ # Returns a duplicate of +self+, in mixed mode
+ # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]):
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true).by_col!
+ # table.mode # => :col
+ # dup_table = table.by_col_or_row
+ # dup_table.mode # => :col_or_row
+ # dup_table.equal?(table) # => false # It's a dup
+ #
+ # This may be used to chain method calls without changing the mode
+ # (but also will affect performance and memory usage):
+ # dup_table.by_col_or_row['Name']
+ #
+ # Also note that changes to the duplicate table will not affect the original.
+ def by_col_or_row
+ self.class.new(@table.dup).by_col_or_row!
+ end
+
+ # :call-seq:
+ # table.by_col_or_row! -> self
+ #
+ # Sets the mode for +self+ to mixed mode
+ # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]); returns +self+:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true).by_col!
+ # table.mode # => :col
+ # table1 = table.by_col_or_row!
+ # table.mode # => :col_or_row
+ # table1.equal?(table) # => true # Returned self
+ def by_col_or_row!
+ @mode = :col_or_row
+
+ self
+ end
+
+ # :call-seq:
+ # table.by_row -> table_dup
+ #
+ # Returns a duplicate of +self+, in row mode
+ # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]):
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.mode # => :col_or_row
+ # dup_table = table.by_row
+ # dup_table.mode # => :row
+ # dup_table.equal?(table) # => false # It's a dup
+ #
+ # This may be used to chain method calls without changing the mode
+ # (but also will affect performance and memory usage):
+ # dup_table.by_row[1]
+ #
+ # Also note that changes to the duplicate table will not affect the original.
+ def by_row
+ self.class.new(@table.dup).by_row!
+ end
+
+ # :call-seq:
+ # table.by_row! -> self
+ #
+ # Sets the mode for +self+ to row mode
+ # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]); returns +self+:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.mode # => :col_or_row
+ # table1 = table.by_row!
+ # table.mode # => :row
+ # table1.equal?(table) # => true # Returned self
+ def by_row!
+ @mode = :row
+
+ self
+ end
+
+ # :call-seq:
+ # table.headers -> array_of_headers
+ #
+ # Returns a new \Array containing the \String headers for the table.
+ #
+ # If the table is not empty, returns the headers from the first row:
+ # rows = [
+ # CSV::Row.new(['Foo', 'Bar'], []),
+ # CSV::Row.new(['FOO', 'BAR'], []),
+ # CSV::Row.new(['foo', 'bar'], []),
+ # ]
+ # table = CSV::Table.new(rows)
+ # table.headers # => ["Foo", "Bar"]
+ # table.delete(0)
+ # table.headers # => ["FOO", "BAR"]
+ # table.delete(0)
+ # table.headers # => ["foo", "bar"]
+ #
+ # If the table is empty, returns a copy of the headers in the table itself:
+ # table.delete(0)
+ # table.headers # => ["Foo", "Bar"]
+ def headers
+ if @table.empty?
+ @headers.dup
+ else
+ @table.first.headers
+ end
+ end
+
+ # :call-seq:
+ # table[n] -> row or column_data
+ # table[range] -> array_of_rows or array_of_column_data
+ # table[header] -> array_of_column_data
+ #
+ # Returns data from the table; does not modify the table.
+ #
+ # ---
+ #
+ # Fetch a \Row by Its \Integer Index::
+ # - Form: <tt>table[n]</tt>, +n+ an integer.
+ # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
+ # - Return value: _nth_ row of the table, if that row exists;
+ # otherwise +nil+.
+ #
+ # Returns the _nth_ row of the table if that row exists:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
+ # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
+ # table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
+ #
+ # Counts backward from the last row if +n+ is negative:
+ # table[-1] # => #<CSV::Row "Name":"baz" "Value":"2">
+ #
+ # Returns +nil+ if +n+ is too large or too small:
+ # table[4] # => nil
+ # table[-4] # => nil
+ #
+ # Raises an exception if the access mode is <tt>:row</tt>
+ # and +n+ is not an \Integer:
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # # Raises TypeError (no implicit conversion of String into Integer):
+ # table['Name']
+ #
+ # ---
+ #
+ # Fetch a Column by Its \Integer Index::
+ # - Form: <tt>table[n]</tt>, +n+ an \Integer.
+ # - Access mode: <tt>:col</tt>.
+ # - Return value: _nth_ column of the table, if that column exists;
+ # otherwise an \Array of +nil+ fields of length <tt>self.size</tt>.
+ #
+ # Returns the _nth_ column of the table if that column exists:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ # table[1] # => ["0", "1", "2"]
+ #
+ # Counts backward from the last column if +n+ is negative:
+ # table[-2] # => ["foo", "bar", "baz"]
+ #
+ # Returns an \Array of +nil+ fields if +n+ is too large or too small:
+ # table[4] # => [nil, nil, nil]
+ # table[-4] # => [nil, nil, nil]
+ #
+ # ---
+ #
+ # Fetch Rows by \Range::
+ # - Form: <tt>table[range]</tt>, +range+ a \Range object.
+ # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
+ # - Return value: rows from the table, beginning at row <tt>range.start</tt>,
+ # if those rows exists.
+ #
+ # Returns rows from the table, beginning at row <tt>range.first</tt>,
+ # if those rows exist:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1">
+ # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
+ # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
+ # rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1">
+ # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
+ #
+ # If there are too few rows, returns all from <tt>range.start</tt> to the end:
+ # rows = table[1..50] # => #<CSV::Row "Name":"bar" "Value":"1">
+ # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
+ #
+ # Special case: if <tt>range.start == table.size</tt>, returns an empty \Array:
+ # table[table.size..50] # => []
+ #
+ # If <tt>range.end</tt> is negative, calculates the ending index from the end:
+ # rows = table[0..-1]
+ # rows # => [#<CSV::Row "Name":"foo" "Value":"0">, #<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
+ #
+ # If <tt>range.start</tt> is negative, calculates the starting index from the end:
+ # rows = table[-1..2]
+ # rows # => [#<CSV::Row "Name":"baz" "Value":"2">]
+ #
+ # If <tt>range.start</tt> is larger than <tt>table.size</tt>, returns +nil+:
+ # table[4..4] # => nil
+ #
+ # ---
+ #
+ # Fetch Columns by \Range::
+ # - Form: <tt>table[range]</tt>, +range+ a \Range object.
+ # - Access mode: <tt>:col</tt>.
+ # - Return value: column data from the table, beginning at column <tt>range.start</tt>,
+ # if those columns exist.
+ #
+ # Returns column values from the table, if the column exists;
+ # the values are arranged by row:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col!
+ # table[0..1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
+ #
+ # Special case: if <tt>range.start == headers.size</tt>,
+ # returns an \Array (size: <tt>table.size</tt>) of empty \Arrays:
+ # table[table.headers.size..50] # => [[], [], []]
+ #
+ # If <tt>range.end</tt> is negative, calculates the ending index from the end:
+ # table[0..-1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
+ #
+ # If <tt>range.start</tt> is negative, calculates the starting index from the end:
+ # table[-2..2] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
+ #
+ # If <tt>range.start</tt> is larger than <tt>table.size</tt>,
+ # returns an \Array of +nil+ values:
+ # table[4..4] # => [nil, nil, nil]
+ #
+ # ---
+ #
+ # Fetch a Column by Its \String Header::
+ # - Form: <tt>table[header]</tt>, +header+ a \String header.
+ # - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>
+ # - Return value: column data from the table, if that +header+ exists.
+ #
+ # Returns column values from the table, if the column exists:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ # table['Name'] # => ["foo", "bar", "baz"]
+ # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
+ # col = table['Name']
+ # col # => ["foo", "bar", "baz"]
+ #
+ # Modifying the returned column values does not modify the table:
+ # col[0] = 'bat'
+ # col # => ["bat", "bar", "baz"]
+ # table['Name'] # => ["foo", "bar", "baz"]
+ #
+ # Returns an \Array of +nil+ values if there is no such column:
+ # table['Nosuch'] # => [nil, nil, nil]
+ def [](index_or_header)
+ if @mode == :row or # by index
+ (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range)))
+ @table[index_or_header]
+ else # by header
+ @table.map { |row| row[index_or_header] }
+ end
+ end
+
+ # :call-seq:
+ # table[n] = row -> row
+ # table[n] = field_or_array_of_fields -> field_or_array_of_fields
+ # table[header] = field_or_array_of_fields -> field_or_array_of_fields
+ #
+ # Puts data onto the table.
+ #
+ # ---
+ #
+ # Set a \Row by Its \Integer Index::
+ # - Form: <tt>table[n] = row</tt>, +n+ an \Integer,
+ # +row+ a \CSV::Row instance or an \Array of fields.
+ # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
+ # - Return value: +row+.
+ #
+ # If the row exists, it is replaced:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # new_row = CSV::Row.new(['Name', 'Value'], ['bat', 3])
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # return_value = table[0] = new_row
+ # return_value.equal?(new_row) # => true # Returned the row
+ # table[0].to_h # => {"Name"=>"bat", "Value"=>3}
+ #
+ # With access mode <tt>:col_or_row</tt>:
+ # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
+ # table[0] = CSV::Row.new(['Name', 'Value'], ['bam', 4])
+ # table[0].to_h # => {"Name"=>"bam", "Value"=>4}
+ #
+ # With an \Array instead of a \CSV::Row, inherits headers from the table:
+ # array = ['bad', 5]
+ # return_value = table[0] = array
+ # return_value.equal?(array) # => true # Returned the array
+ # table[0].to_h # => {"Name"=>"bad", "Value"=>5}
+ #
+ # If the row does not exist, extends the table by adding rows:
+ # assigns rows with +nil+ as needed:
+ # table.size # => 3
+ # table[5] = ['bag', 6]
+ # table.size # => 6
+ # table[3] # => nil
+ # table[4]# => nil
+ # table[5].to_h # => {"Name"=>"bag", "Value"=>6}
+ #
+ # Note that the +nil+ rows are actually +nil+, not a row of +nil+ fields.
+ #
+ # ---
+ #
+ # Set a Column by Its \Integer Index::
+ # - Form: <tt>table[n] = array_of_fields</tt>, +n+ an \Integer,
+ # +array_of_fields+ an \Array of \String fields.
+ # - Access mode: <tt>:col</tt>.
+ # - Return value: +array_of_fields+.
+ #
+ # If the column exists, it is replaced:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # new_col = [3, 4, 5]
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ # return_value = table[1] = new_col
+ # return_value.equal?(new_col) # => true # Returned the column
+ # table[1] # => [3, 4, 5]
+ # # The rows, as revised:
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # table[0].to_h # => {"Name"=>"foo", "Value"=>3}
+ # table[1].to_h # => {"Name"=>"bar", "Value"=>4}
+ # table[2].to_h # => {"Name"=>"baz", "Value"=>5}
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ #
+ # If there are too few values, fills with +nil+ values:
+ # table[1] = [0]
+ # table[1] # => [0, nil, nil]
+ #
+ # If there are too many values, ignores the extra values:
+ # table[1] = [0, 1, 2, 3, 4]
+ # table[1] # => [0, 1, 2]
+ #
+ # If a single value is given, replaces all fields in the column with that value:
+ # table[1] = 'bat'
+ # table[1] # => ["bat", "bat", "bat"]
+ #
+ # ---
+ #
+ # Set a Column by Its \String Header::
+ # - Form: <tt>table[header] = field_or_array_of_fields</tt>,
+ # +header+ a \String header, +field_or_array_of_fields+ a field value
+ # or an \Array of \String fields.
+ # - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>.
+ # - Return value: +field_or_array_of_fields+.
+ #
+ # If the column exists, it is replaced:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # new_col = [3, 4, 5]
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ # return_value = table['Value'] = new_col
+ # return_value.equal?(new_col) # => true # Returned the column
+ # table['Value'] # => [3, 4, 5]
+ # # The rows, as revised:
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # table[0].to_h # => {"Name"=>"foo", "Value"=>3}
+ # table[1].to_h # => {"Name"=>"bar", "Value"=>4}
+ # table[2].to_h # => {"Name"=>"baz", "Value"=>5}
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ #
+ # If there are too few values, fills with +nil+ values:
+ # table['Value'] = [0]
+ # table['Value'] # => [0, nil, nil]
+ #
+ # If there are too many values, ignores the extra values:
+ # table['Value'] = [0, 1, 2, 3, 4]
+ # table['Value'] # => [0, 1, 2]
+ #
+ # If the column does not exist, extends the table by adding columns:
+ # table['Note'] = ['x', 'y', 'z']
+ # table['Note'] # => ["x", "y", "z"]
+ # # The rows, as revised:
+ # table.by_row!
+ # table[0].to_h # => {"Name"=>"foo", "Value"=>0, "Note"=>"x"}
+ # table[1].to_h # => {"Name"=>"bar", "Value"=>1, "Note"=>"y"}
+ # table[2].to_h # => {"Name"=>"baz", "Value"=>2, "Note"=>"z"}
+ # table.by_col!
+ #
+ # If a single value is given, replaces all fields in the column with that value:
+ # table['Value'] = 'bat'
+ # table['Value'] # => ["bat", "bat", "bat"]
+ def []=(index_or_header, value)
+ if @mode == :row or # by index
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
+ if value.is_a? Array
+ @table[index_or_header] = Row.new(headers, value)
+ else
+ @table[index_or_header] = value
+ end
+ else # set column
+ unless index_or_header.is_a? Integer
+ index = @headers.index(index_or_header) || @headers.size
+ @headers[index] = index_or_header
+ end
+ if value.is_a? Array # multiple values
+ @table.each_with_index do |row, i|
+ if row.header_row?
+ row[index_or_header] = index_or_header
+ else
+ row[index_or_header] = value[i]
+ end
+ end
+ else # repeated value
+ @table.each do |row|
+ if row.header_row?
+ row[index_or_header] = index_or_header
+ else
+ row[index_or_header] = value
+ end
+ end
+ end
+ end
+ end
+
+ # :call-seq:
+ # table.values_at(*indexes) -> array_of_rows
+ # table.values_at(*headers) -> array_of_columns_data
+ #
+ # If the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>,
+ # and each argument is either an \Integer or a \Range,
+ # returns rows.
+ # Otherwise, returns columns data.
+ #
+ # In either case, the returned values are in the order
+ # specified by the arguments. Arguments may be repeated.
+ #
+ # ---
+ #
+ # Returns rows as an \Array of \CSV::Row objects.
+ #
+ # No argument:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.values_at # => []
+ #
+ # One index:
+ # values = table.values_at(0)
+ # values # => [#<CSV::Row "Name":"foo" "Value":"0">]
+ #
+ # Two indexes:
+ # values = table.values_at(2, 0)
+ # values # => [#<CSV::Row "Name":"baz" "Value":"2">, #<CSV::Row "Name":"foo" "Value":"0">]
+ #
+ # One \Range:
+ # values = table.values_at(1..2)
+ # values # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
+ #
+ # \Ranges and indexes:
+ # values = table.values_at(0..1, 1..2, 0, 2)
+ # pp values
+ # Output:
+ # [#<CSV::Row "Name":"foo" "Value":"0">,
+ # #<CSV::Row "Name":"bar" "Value":"1">,
+ # #<CSV::Row "Name":"bar" "Value":"1">,
+ # #<CSV::Row "Name":"baz" "Value":"2">,
+ # #<CSV::Row "Name":"foo" "Value":"0">,
+ # #<CSV::Row "Name":"baz" "Value":"2">]
+ #
+ # ---
+ #
+ # Returns columns data as row Arrays,
+ # each consisting of the specified columns data for that row:
+ # values = table.values_at('Name')
+ # values # => [["foo"], ["bar"], ["baz"]]
+ # values = table.values_at('Value', 'Name')
+ # values # => [["0", "foo"], ["1", "bar"], ["2", "baz"]]
+ def values_at(*indices_or_headers)
+ if @mode == :row or # by indices
+ ( @mode == :col_or_row and indices_or_headers.all? do |index|
+ index.is_a?(Integer) or
+ ( index.is_a?(Range) and
+ index.first.is_a?(Integer) and
+ index.last.is_a?(Integer) )
+ end )
+ @table.values_at(*indices_or_headers)
+ else # by headers
+ @table.map { |row| row.values_at(*indices_or_headers) }
+ end
+ end
+
+ # :call-seq:
+ # table << row_or_array -> self
+ #
+ # If +row_or_array+ is a \CSV::Row object,
+ # it is appended to the table:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table << CSV::Row.new(table.headers, ['bat', 3])
+ # table[3] # => #<CSV::Row "Name":"bat" "Value":3>
+ #
+ # If +row_or_array+ is an \Array, it is used to create a new
+ # \CSV::Row object which is then appended to the table:
+ # table << ['bam', 4]
+ # table[4] # => #<CSV::Row "Name":"bam" "Value":4>
+ def <<(row_or_array)
+ if row_or_array.is_a? Array # append Array
+ @table << Row.new(headers, row_or_array)
+ else # append Row
+ @table << row_or_array
+ end
+
+ self # for chaining
+ end
+
+ #
+ # :call-seq:
+ # table.push(*rows_or_arrays) -> self
+ #
+ # A shortcut for appending multiple rows. Equivalent to:
+ # rows.each {|row| self << row }
+ #
+ # Each argument may be either a \CSV::Row object or an \Array:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # rows = [
+ # CSV::Row.new(table.headers, ['bat', 3]),
+ # ['bam', 4]
+ # ]
+ # table.push(*rows)
+ # table[3..4] # => [#<CSV::Row "Name":"bat" "Value":3>, #<CSV::Row "Name":"bam" "Value":4>]
+ def push(*rows)
+ rows.each { |row| self << row }
+
+ self # for chaining
+ end
+
+ # :call-seq:
+ # table.delete(*indexes) -> deleted_values
+ # table.delete(*headers) -> deleted_values
+ #
+ # If the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>,
+ # and each argument is either an \Integer or a \Range,
+ # returns deleted rows.
+ # Otherwise, returns deleted columns data.
+ #
+ # In either case, the returned values are in the order
+ # specified by the arguments. Arguments may be repeated.
+ #
+ # ---
+ #
+ # Returns rows as an \Array of \CSV::Row objects.
+ #
+ # One index:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # deleted_values = table.delete(0)
+ # deleted_values # => [#<CSV::Row "Name":"foo" "Value":"0">]
+ #
+ # Two indexes:
+ # table = CSV.parse(source, headers: true)
+ # deleted_values = table.delete(2, 0)
+ # deleted_values # => [#<CSV::Row "Name":"baz" "Value":"2">, #<CSV::Row "Name":"foo" "Value":"0">]
+ #
+ # ---
+ #
+ # Returns columns data as column Arrays.
+ #
+ # One header:
+ # table = CSV.parse(source, headers: true)
+ # deleted_values = table.delete('Name')
+ # deleted_values # => ["foo", "bar", "baz"]
+ #
+ # Two headers:
+ # table = CSV.parse(source, headers: true)
+ # deleted_values = table.delete('Value', 'Name')
+ # deleted_values # => [["0", "1", "2"], ["foo", "bar", "baz"]]
+ def delete(*indexes_or_headers)
+ if indexes_or_headers.empty?
+ raise ArgumentError, "wrong number of arguments (given 0, expected 1+)"
+ end
+ deleted_values = indexes_or_headers.map do |index_or_header|
+ if @mode == :row or # by index
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
+ @table.delete_at(index_or_header)
+ else # by header
+ if index_or_header.is_a? Integer
+ @headers.delete_at(index_or_header)
+ else
+ @headers.delete(index_or_header)
+ end
+ @table.map { |row| row.delete(index_or_header).last }
+ end
+ end
+ if indexes_or_headers.size == 1
+ deleted_values[0]
+ else
+ deleted_values
+ end
+ end
+
+ # :call-seq:
+ # table.delete_if {|row_or_column| ... } -> self
+ #
+ # Removes rows or columns for which the block returns a truthy value;
+ # returns +self+.
+ #
+ # Removes rows when the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>;
+ # calls the block with each \CSV::Row object:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # table.size # => 3
+ # table.delete_if {|row| row['Name'].start_with?('b') }
+ # table.size # => 1
+ #
+ # Removes columns when the access mode is <tt>:col</tt>;
+ # calls the block with each column as a 2-element array
+ # containing the header and an \Array of column fields:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ # table.headers.size # => 2
+ # table.delete_if {|column_data| column_data[1].include?('2') }
+ # table.headers.size # => 1
+ #
+ # Returns a new \Enumerator if no block is given:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.delete_if # => #<Enumerator: #<CSV::Table mode:col_or_row row_count:4>:delete_if>
+ def delete_if(&block)
+ return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size } unless block_given?
+
+ if @mode == :row or @mode == :col_or_row # by index
+ @table.delete_if(&block)
+ else # by header
+ deleted = []
+ headers.each do |header|
+ deleted << delete(header) if yield([header, self[header]])
+ end
+ end
+
+ self # for chaining
+ end
+
+ include Enumerable
+
+ # :call-seq:
+ # table.each {|row_or_column| ... ) -> self
+ #
+ # Calls the block with each row or column; returns +self+.
+ #
+ # When the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>,
+ # calls the block with each \CSV::Row object:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # table.each {|row| p row }
+ # Output:
+ # #<CSV::Row "Name":"foo" "Value":"0">
+ # #<CSV::Row "Name":"bar" "Value":"1">
+ # #<CSV::Row "Name":"baz" "Value":"2">
+ #
+ # When the access mode is <tt>:col</tt>,
+ # calls the block with each column as a 2-element array
+ # containing the header and an \Array of column fields:
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ # table.each {|column_data| p column_data }
+ # Output:
+ # ["Name", ["foo", "bar", "baz"]]
+ # ["Value", ["0", "1", "2"]]
+ #
+ # Returns a new \Enumerator if no block is given:
+ # table.each # => #<Enumerator: #<CSV::Table mode:col row_count:4>:each>
+ def each(&block)
+ return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given?
+
+ if @mode == :col
+ headers.each.with_index do |header, i|
+ yield([header, @table.map {|row| row[header, i]}])
+ end
+ else
+ @table.each(&block)
+ end
+
+ self # for chaining
+ end
+
+ # :call-seq:
+ # table == other_table -> true or false
+ #
+ # Returns +true+ if all each row of +self+ <tt>==</tt>
+ # the corresponding row of +other_table+, otherwise, +false+.
+ #
+ # The access mode does no affect the result.
+ #
+ # Equal tables:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # other_table = CSV.parse(source, headers: true)
+ # table == other_table # => true
+ #
+ # Different row count:
+ # other_table.delete(2)
+ # table == other_table # => false
+ #
+ # Different last row:
+ # other_table << ['bat', 3]
+ # table == other_table # => false
+ def ==(other)
+ return @table == other.table if other.is_a? CSV::Table
+ @table == other
+ end
+
+ # :call-seq:
+ # table.to_a -> array_of_arrays
+ #
+ # Returns the table as an \Array of \Arrays;
+ # the headers are in the first row:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.to_a # => [["Name", "Value"], ["foo", "0"], ["bar", "1"], ["baz", "2"]]
+ def to_a
+ array = [headers]
+ @table.each do |row|
+ array.push(row.fields) unless row.header_row?
+ end
+
+ array
+ end
+
+ # :call-seq:
+ # table.to_csv(**options) -> csv_string
+ #
+ # Returns the table as \CSV string.
+ # See {Options for Generating}[../CSV.html#class-CSV-label-Options+for+Generating].
+ #
+ # Defaults option +write_headers+ to +true+:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.to_csv # => "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ #
+ # Omits the headers if option +write_headers+ is given as +false+
+ # (see {Option +write_headers+}[../CSV.html#class-CSV-label-Option+write_headers]):
+ # table.to_csv(write_headers: false) # => "foo,0\nbar,1\nbaz,2\n"
+ #
+ # Limit rows if option +limit+ is given like +2+:
+ # table.to_csv(limit: 2) # => "Name,Value\nfoo,0\nbar,1\n"
+ def to_csv(write_headers: true, limit: nil, **options)
+ array = write_headers ? [headers.to_csv(**options)] : []
+ limit ||= @table.size
+ limit = @table.size + 1 + limit if limit < 0
+ limit = 0 if limit < 0
+ @table.first(limit).each do |row|
+ array.push(row.fields.to_csv(**options)) unless row.header_row?
+ end
+
+ array.join("")
+ end
+ alias_method :to_s, :to_csv
+
+ #
+ # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step,
+ # returning nil if any intermediate step is nil.
+ #
+ def dig(index_or_header, *index_or_headers)
+ value = self[index_or_header]
+ if value.nil?
+ nil
+ elsif index_or_headers.empty?
+ value
+ else
+ unless value.respond_to?(:dig)
+ raise TypeError, "#{value.class} does not have \#dig method"
+ end
+ value.dig(*index_or_headers)
+ end
+ end
+
+ # :call-seq:
+ # table.inspect => string
+ #
+ # Returns a <tt>US-ASCII</tt>-encoded \String showing table:
+ # - Class: <tt>CSV::Table</tt>.
+ # - Access mode: <tt>:row</tt>, <tt>:col</tt>, or <tt>:col_or_row</tt>.
+ # - Size: Row count, including the header row.
+ #
+ # Example:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.inspect # => "#<CSV::Table mode:col_or_row row_count:4>\nName,Value\nfoo,0\nbar,1\nbaz,2\n"
+ #
+ def inspect
+ inspected = +"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
+ summary = to_csv(limit: 5)
+ inspected << "\n" << summary if summary.encoding.ascii_compatible?
+ inspected
+ end
+ end
+end
diff --git a/lib/csv/version.rb b/lib/csv/version.rb
new file mode 100644
index 0000000000..ca16064a89
--- /dev/null
+++ b/lib/csv/version.rb
@@ -0,0 +1,6 @@
+# frozen_string_literal: true
+
+class CSV
+ # The version of the installed library.
+ VERSION = "3.2.5"
+end
diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb
new file mode 100644
index 0000000000..030a295bc9
--- /dev/null
+++ b/lib/csv/writer.rb
@@ -0,0 +1,210 @@
+# frozen_string_literal: true
+
+require_relative "input_record_separator"
+require_relative "row"
+
+class CSV
+ # Note: Don't use this class directly. This is an internal class.
+ class Writer
+ #
+ # A CSV::Writer receives an output, prepares the header, format and output.
+ # It allows us to write new rows in the object and rewind it.
+ #
+ attr_reader :lineno
+ attr_reader :headers
+
+ def initialize(output, options)
+ @output = output
+ @options = options
+ @lineno = 0
+ @fields_converter = nil
+ prepare
+ if @options[:write_headers] and @headers
+ self << @headers
+ end
+ @fields_converter = @options[:fields_converter]
+ end
+
+ #
+ # Adds a new row
+ #
+ def <<(row)
+ case row
+ when Row
+ row = row.fields
+ when Hash
+ row = @headers.collect {|header| row[header]}
+ end
+
+ @headers ||= row if @use_headers
+ @lineno += 1
+
+ if @fields_converter
+ quoted_fields = [false] * row.size
+ row = @fields_converter.convert(row, nil, lineno, quoted_fields)
+ end
+
+ i = -1
+ converted_row = row.collect do |field|
+ i += 1
+ quote(field, i)
+ end
+ line = converted_row.join(@column_separator) + @row_separator
+ if @output_encoding
+ line = line.encode(@output_encoding)
+ end
+ @output << line
+
+ self
+ end
+
+ #
+ # Winds back to the beginning
+ #
+ def rewind
+ @lineno = 0
+ @headers = nil if @options[:headers].nil?
+ end
+
+ private
+ def prepare
+ @encoding = @options[:encoding]
+
+ prepare_header
+ prepare_format
+ prepare_output
+ end
+
+ def prepare_header
+ headers = @options[:headers]
+ case headers
+ when Array
+ @headers = headers
+ @use_headers = true
+ when String
+ @headers = CSV.parse_line(headers,
+ col_sep: @options[:column_separator],
+ row_sep: @options[:row_separator],
+ quote_char: @options[:quote_character])
+ @use_headers = true
+ when true
+ @headers = nil
+ @use_headers = true
+ else
+ @headers = nil
+ @use_headers = false
+ end
+ return unless @headers
+
+ converter = @options[:header_fields_converter]
+ @headers = converter.convert(@headers, nil, 0, [])
+ @headers.each do |header|
+ header.freeze if header.is_a?(String)
+ end
+ end
+
+ def prepare_force_quotes_fields(force_quotes)
+ @force_quotes_fields = {}
+ force_quotes.each do |name_or_index|
+ case name_or_index
+ when Integer
+ index = name_or_index
+ @force_quotes_fields[index] = true
+ when String, Symbol
+ name = name_or_index.to_s
+ if @headers.nil?
+ message = ":headers is required when you use field name " +
+ "in :force_quotes: " +
+ "#{name_or_index.inspect}: #{force_quotes.inspect}"
+ raise ArgumentError, message
+ end
+ index = @headers.index(name)
+ next if index.nil?
+ @force_quotes_fields[index] = true
+ else
+ message = ":force_quotes element must be " +
+ "field index or field name: " +
+ "#{name_or_index.inspect}: #{force_quotes.inspect}"
+ raise ArgumentError, message
+ end
+ end
+ end
+
+ def prepare_format
+ @column_separator = @options[:column_separator].to_s.encode(@encoding)
+ row_separator = @options[:row_separator]
+ if row_separator == :auto
+ @row_separator = InputRecordSeparator.value.encode(@encoding)
+ else
+ @row_separator = row_separator.to_s.encode(@encoding)
+ end
+ @quote_character = @options[:quote_character]
+ force_quotes = @options[:force_quotes]
+ if force_quotes.is_a?(Array)
+ prepare_force_quotes_fields(force_quotes)
+ @force_quotes = false
+ elsif force_quotes
+ @force_quotes_fields = nil
+ @force_quotes = true
+ else
+ @force_quotes_fields = nil
+ @force_quotes = false
+ end
+ unless @force_quotes
+ @quotable_pattern =
+ Regexp.new("[\r\n".encode(@encoding) +
+ Regexp.escape(@column_separator) +
+ Regexp.escape(@quote_character.encode(@encoding)) +
+ "]".encode(@encoding))
+ end
+ @quote_empty = @options.fetch(:quote_empty, true)
+ end
+
+ def prepare_output
+ @output_encoding = nil
+ return unless @output.is_a?(StringIO)
+
+ output_encoding = @output.internal_encoding || @output.external_encoding
+ if @encoding != output_encoding
+ if @options[:force_encoding]
+ @output_encoding = output_encoding
+ else
+ compatible_encoding = Encoding.compatible?(@encoding, output_encoding)
+ if compatible_encoding
+ @output.set_encoding(compatible_encoding)
+ @output.seek(0, IO::SEEK_END)
+ end
+ end
+ end
+ end
+
+ def quote_field(field)
+ field = String(field)
+ encoded_quote_character = @quote_character.encode(field.encoding)
+ encoded_quote_character +
+ field.gsub(encoded_quote_character,
+ encoded_quote_character * 2) +
+ encoded_quote_character
+ end
+
+ def quote(field, i)
+ if @force_quotes
+ quote_field(field)
+ elsif @force_quotes_fields and @force_quotes_fields[i]
+ quote_field(field)
+ else
+ if field.nil? # represent +nil+ fields as empty unquoted fields
+ ""
+ else
+ field = String(field) # Stringify fields
+ # represent empty fields as empty quoted fields
+ if (@quote_empty and field.empty?) or (field.valid_encoding? and @quotable_pattern.match?(field))
+ quote_field(field)
+ else
+ field # unquoted field
+ end
+ end
+ end
+ end
+ end
+end