diff options
Diffstat (limited to 'lib/csv.rb')
| -rw-r--r-- | lib/csv.rb | 4320 |
1 files changed, 2427 insertions, 1893 deletions
diff --git a/lib/csv.rb b/lib/csv.rb index 54b820df31..170ab04c24 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -1,9 +1,8 @@ # encoding: US-ASCII +# frozen_string_literal: true # = csv.rb -- CSV Reading and Writing # -# Created by James Edward Gray II on 2005-10-31. -# Copyright 2005 James Edward Gray II. You can redistribute or modify this code -# under the terms of Ruby's license. +# Created by James Edward Gray II on 2005-10-31. # # See CSV for documentation. # @@ -11,18 +10,18 @@ # # Welcome to the new and improved CSV. # -# This version of the CSV library began its life as FasterCSV. FasterCSV was -# intended as a replacement to Ruby's then standard CSV library. It was +# This version of the CSV library began its life as FasterCSV. FasterCSV was +# intended as a replacement to Ruby's then standard CSV library. It was # designed to address concerns users of that library had and it had three # primary goals: # # 1. Be significantly faster than CSV while remaining a pure Ruby library. -# 2. Use a smaller and easier to maintain code base. (FasterCSV eventually -# grew larger, was also but considerably richer in features. The parsing +# 2. Use a smaller and easier to maintain code base. (FasterCSV eventually +# grew larger, was also but considerably richer in features. The parsing # core remains quite small.) # 3. Improve on the CSV interface. # -# Obviously, the last one is subjective. I did try to defer to the original +# Obviously, the last one is subjective. I did try to defer to the original # interface whenever I didn't have a compelling reason to change it though, so # hopefully this won't be too radically different. # @@ -30,26 +29,26 @@ # the original library as of Ruby 1.9. If you are migrating code from 1.8 or # earlier, you may have to change your code to comply with the new interface. # -# == What's Different From the Old CSV? +# == What's the Different From the Old CSV? # # I'm sure I'll miss something, but I'll try to mention most of the major # differences I am aware of, to help others quickly get up to speed: # -# === CSV Parsing +# === \CSV Parsing # -# * This parser is m17n aware. See CSV for full details. +# * This parser is m17n aware. See CSV for full details. # * This library has a stricter parser and will throw MalformedCSVErrors on # problematic data. -# * This library has a less liberal idea of a line ending than CSV. What you -# set as the <tt>:row_sep</tt> is law. It can auto-detect your line endings +# * This library has a less liberal idea of a line ending than CSV. What you +# set as the <tt>:row_sep</tt> is law. It can auto-detect your line endings # though. -# * The old library returned empty lines as <tt>[nil]</tt>. This library calls +# * The old library returned empty lines as <tt>[nil]</tt>. This library calls # them <tt>[]</tt>. # * This library has a much faster parser. # # === Interface # -# * CSV now uses Hash-style parameters to set options. +# * CSV now uses keyword parameters to set options. # * CSV no longer has generate_row() or parse_row(). # * The old CSV's Reader and Writer classes have been dropped. # * CSV::open() is now more like Ruby's open(). @@ -57,9 +56,9 @@ # * CSV now has a new() method used to wrap objects like String and IO for # reading and writing. # * CSV::generate() is different from the old method. -# * CSV no longer supports partial reads. It works line-by-line. +# * CSV no longer supports partial reads. It works line-by-line. # * CSV no longer allows the instance methods to override the separators for -# performance reasons. They must be set in the constructor. +# performance reasons. They must be set in the constructor. # # If you use this library and find yourself missing any functionality I have # trimmed, please {let me know}[mailto:james@grayproductions.net]. @@ -71,16 +70,16 @@ # == What is CSV, really? # # CSV maintains a pretty strict definition of CSV taken directly from -# {the RFC}[http://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one -# place and that is to make using this library easier. CSV will parse all valid +# {the RFC}[http://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one +# place and that is to make using this library easier. CSV will parse all valid # CSV. # -# What you don't want to do is feed CSV invalid data. Because of the way the +# What you don't want to do is to feed CSV invalid data. Because of the way the # CSV format works, it's common for a parser to need to read until the end of -# the file to be sure a field is invalid. This eats a lot of time and memory. +# the file to be sure a field is invalid. This consumes a lot of time and memory. # # Luckily, when working with invalid CSV, Ruby's built-in methods will almost -# always be superior in every way. For example, parsing non-quoted fields is as +# always be superior in every way. For example, parsing non-quoted fields is as # easy as: # # data.split(",") @@ -91,1837 +90,2598 @@ # with any questions. require "forwardable" -require "English" require "date" require "stringio" +require_relative "csv/fields_converter" +require_relative "csv/input_record_separator" +require_relative "csv/parser" +require_relative "csv/row" +require_relative "csv/table" +require_relative "csv/writer" + +# == \CSV +# +# === In a Hurry? +# +# If you are familiar with \CSV data and have a particular task in mind, +# you may want to go directly to the: +# - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html]. +# +# Otherwise, read on here, about the API: classes, methods, and constants. +# +# === \CSV Data +# +# \CSV (comma-separated values) data is a text representation of a table: +# - A _row_ _separator_ delimits table rows. +# A common row separator is the newline character <tt>"\n"</tt>. +# - A _column_ _separator_ delimits fields in a row. +# A common column separator is the comma character <tt>","</tt>. +# +# This \CSV \String, with row separator <tt>"\n"</tt> +# and column separator <tt>","</tt>, +# has three rows and two columns: +# "foo,0\nbar,1\nbaz,2\n" +# +# Despite the name \CSV, a \CSV representation can use different separators. +# +# For more about tables, see the Wikipedia article +# "{Table (information)}[https://en.wikipedia.org/wiki/Table_(information)]", +# especially its section +# "{Simple table}[https://en.wikipedia.org/wiki/Table_(information)#Simple_table]" +# +# == \Class \CSV +# +# Class \CSV provides methods for: +# - Parsing \CSV data from a \String object, a \File (via its file path), or an \IO object. +# - Generating \CSV data to a \String object. +# +# To make \CSV available: +# require 'csv' +# +# All examples here assume that this has been done. +# +# == Keeping It Simple +# +# A \CSV object has dozens of instance methods that offer fine-grained control +# of parsing and generating \CSV data. +# For many needs, though, simpler approaches will do. +# +# This section summarizes the singleton methods in \CSV +# that allow you to parse and generate without explicitly +# creating \CSV objects. +# For details, follow the links. +# +# === Simple Parsing +# +# Parsing methods commonly return either of: +# - An \Array of Arrays of Strings: +# - The outer \Array is the entire "table". +# - Each inner \Array is a row. +# - Each \String is a field. +# - A CSV::Table object. For details, see +# {\CSV with Headers}[#class-CSV-label-CSV+with+Headers]. +# +# ==== Parsing a \String +# +# The input to be parsed can be a string: +# string = "foo,0\nbar,1\nbaz,2\n" +# +# \Method CSV.parse returns the entire \CSV data: +# CSV.parse(string) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # -# This class provides a complete interface to CSV files and data. It offers -# tools to enable you to read and write to and from Strings or IO objects, as -# needed. +# \Method CSV.parse_line returns only the first row: +# CSV.parse_line(string) # => ["foo", "0"] # -# == Reading +# \CSV extends class \String with instance method String#parse_csv, +# which also returns only the first row: +# string.parse_csv # => ["foo", "0"] # -# === From a File +# ==== Parsing Via a \File Path # -# ==== A Line at a Time +# The input to be parsed can be in a file: +# string = "foo,0\nbar,1\nbaz,2\n" +# path = 't.csv' +# File.write(path, string) # -# CSV.foreach("path/to/file.csv") do |row| -# # use row here... +# \Method CSV.read returns the entire \CSV data: +# CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] +# +# \Method CSV.foreach iterates, passing each row to the given block: +# CSV.foreach(path) do |row| +# p row +# end +# Output: +# ["foo", "0"] +# ["bar", "1"] +# ["baz", "2"] +# +# \Method CSV.table returns the entire \CSV data as a CSV::Table object: +# CSV.table(path) # => #<CSV::Table mode:col_or_row row_count:3> +# +# ==== Parsing from an Open \IO Stream +# +# The input to be parsed can be in an open \IO stream: +# +# \Method CSV.read returns the entire \CSV data: +# File.open(path) do |file| +# CSV.read(file) +# end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] +# +# As does method CSV.parse: +# File.open(path) do |file| +# CSV.parse(file) +# end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] +# +# \Method CSV.parse_line returns only the first row: +# File.open(path) do |file| +# CSV.parse_line(file) +# end # => ["foo", "0"] +# +# \Method CSV.foreach iterates, passing each row to the given block: +# File.open(path) do |file| +# CSV.foreach(file) do |row| +# p row +# end # end +# Output: +# ["foo", "0"] +# ["bar", "1"] +# ["baz", "2"] +# +# \Method CSV.table returns the entire \CSV data as a CSV::Table object: +# File.open(path) do |file| +# CSV.table(file) +# end # => #<CSV::Table mode:col_or_row row_count:3> # -# ==== All at Once +# === Simple Generating +# +# \Method CSV.generate returns a \String; +# this example uses method CSV#<< to append the rows +# that are to be generated: +# output_string = CSV.generate do |csv| +# csv << ['foo', 0] +# csv << ['bar', 1] +# csv << ['baz', 2] +# end +# output_string # => "foo,0\nbar,1\nbaz,2\n" # -# arr_of_arrs = CSV.read("path/to/file.csv") +# \Method CSV.generate_line returns a \String containing the single row +# constructed from an \Array: +# CSV.generate_line(['foo', '0']) # => "foo,0\n" # -# === From a String +# \CSV extends class \Array with instance method <tt>Array#to_csv</tt>, +# which forms an \Array into a \String: +# ['foo', '0'].to_csv # => "foo,0\n" # -# ==== A Line at a Time +# === "Filtering" \CSV # -# CSV.parse("CSV,data,String") do |row| -# # use row here... +# \Method CSV.filter provides a Unix-style filter for \CSV data. +# The input data is processed to form the output data: +# in_string = "foo,0\nbar,1\nbaz,2\n" +# out_string = '' +# CSV.filter(in_string, out_string) do |row| +# row[0] = row[0].upcase +# row[1] *= 4 # end +# out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" +# +# == \CSV Objects +# +# There are three ways to create a \CSV object: +# - \Method CSV.new returns a new \CSV object. +# - \Method CSV.instance returns a new or cached \CSV object. +# - \Method \CSV() also returns a new or cached \CSV object. +# +# === Instance Methods +# +# \CSV has three groups of instance methods: +# - Its own internally defined instance methods. +# - Methods included by module Enumerable. +# - Methods delegated to class IO. See below. +# +# ==== Delegated Methods +# +# For convenience, a CSV object will delegate to many methods in class IO. +# (A few have wrapper "guard code" in \CSV.) You may call: +# * IO#binmode +# * #binmode? +# * IO#close +# * IO#close_read +# * IO#close_write +# * IO#closed? +# * #eof +# * #eof? +# * IO#external_encoding +# * IO#fcntl +# * IO#fileno +# * #flock +# * IO#flush +# * IO#fsync +# * IO#internal_encoding +# * #ioctl +# * IO#isatty +# * #path +# * IO#pid +# * IO#pos +# * IO#pos= +# * IO#reopen +# * #rewind +# * IO#seek +# * #stat +# * IO#string +# * IO#sync +# * IO#sync= +# * IO#tell +# * #to_i +# * #to_io +# * IO#truncate +# * IO#tty? +# +# === Options +# +# The default values for options are: +# DEFAULT_OPTIONS = { +# # For both parsing and generating. +# col_sep: ",", +# row_sep: :auto, +# quote_char: '"', +# # For parsing. +# field_size_limit: nil, +# converters: nil, +# unconverted_fields: nil, +# headers: false, +# return_headers: false, +# header_converters: nil, +# skip_blanks: false, +# skip_lines: nil, +# liberal_parsing: false, +# nil_value: nil, +# empty_value: "", +# strip: false, +# # For generating. +# write_headers: nil, +# quote_empty: true, +# force_quotes: false, +# write_converters: nil, +# write_nil_value: nil, +# write_empty_value: "", +# } +# +# ==== Options for Parsing +# +# Options for parsing, described in detail below, include: +# - +row_sep+: Specifies the row separator; used to delimit rows. +# - +col_sep+: Specifies the column separator; used to delimit fields. +# - +quote_char+: Specifies the quote character; used to quote fields. +# - +field_size_limit+: Specifies the maximum field size + 1 allowed. +# Deprecated since 3.2.3. Use +max_field_size+ instead. +# - +max_field_size+: Specifies the maximum field size allowed. +# - +converters+: Specifies the field converters to be used. +# - +unconverted_fields+: Specifies whether unconverted fields are to be available. +# - +headers+: Specifies whether data contains headers, +# or specifies the headers themselves. +# - +return_headers+: Specifies whether headers are to be returned. +# - +header_converters+: Specifies the header converters to be used. +# - +skip_blanks+: Specifies whether blanks lines are to be ignored. +# - +skip_lines+: Specifies how comments lines are to be recognized. +# - +strip+: Specifies whether leading and trailing whitespace are to be +# stripped from fields. This must be compatible with +col_sep+; if it is not, +# then an +ArgumentError+ exception will be raised. +# - +liberal_parsing+: Specifies whether \CSV should attempt to parse +# non-compliant data. +# - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field. +# - +empty_value+: Specifies the object that is to be substituted for each empty field. # -# ==== All at Once +# :include: ../doc/csv/options/common/row_sep.rdoc # -# arr_of_arrs = CSV.parse("CSV,data,String") +# :include: ../doc/csv/options/common/col_sep.rdoc # -# == Writing +# :include: ../doc/csv/options/common/quote_char.rdoc # -# === To a File +# :include: ../doc/csv/options/parsing/field_size_limit.rdoc # -# CSV.open("path/to/file.csv", "wb") do |csv| -# csv << ["row", "of", "CSV", "data"] -# csv << ["another", "row"] -# # ... +# :include: ../doc/csv/options/parsing/converters.rdoc +# +# :include: ../doc/csv/options/parsing/unconverted_fields.rdoc +# +# :include: ../doc/csv/options/parsing/headers.rdoc +# +# :include: ../doc/csv/options/parsing/return_headers.rdoc +# +# :include: ../doc/csv/options/parsing/header_converters.rdoc +# +# :include: ../doc/csv/options/parsing/skip_blanks.rdoc +# +# :include: ../doc/csv/options/parsing/skip_lines.rdoc +# +# :include: ../doc/csv/options/parsing/strip.rdoc +# +# :include: ../doc/csv/options/parsing/liberal_parsing.rdoc +# +# :include: ../doc/csv/options/parsing/nil_value.rdoc +# +# :include: ../doc/csv/options/parsing/empty_value.rdoc +# +# ==== Options for Generating +# +# Options for generating, described in detail below, include: +# - +row_sep+: Specifies the row separator; used to delimit rows. +# - +col_sep+: Specifies the column separator; used to delimit fields. +# - +quote_char+: Specifies the quote character; used to quote fields. +# - +write_headers+: Specifies whether headers are to be written. +# - +force_quotes+: Specifies whether each output field is to be quoted. +# - +quote_empty+: Specifies whether each empty output field is to be quoted. +# - +write_converters+: Specifies the field converters to be used in writing. +# - +write_nil_value+: Specifies the object that is to be substituted for each +nil+-valued field. +# - +write_empty_value+: Specifies the object that is to be substituted for each empty field. +# +# :include: ../doc/csv/options/common/row_sep.rdoc +# +# :include: ../doc/csv/options/common/col_sep.rdoc +# +# :include: ../doc/csv/options/common/quote_char.rdoc +# +# :include: ../doc/csv/options/generating/write_headers.rdoc +# +# :include: ../doc/csv/options/generating/force_quotes.rdoc +# +# :include: ../doc/csv/options/generating/quote_empty.rdoc +# +# :include: ../doc/csv/options/generating/write_converters.rdoc +# +# :include: ../doc/csv/options/generating/write_nil_value.rdoc +# +# :include: ../doc/csv/options/generating/write_empty_value.rdoc +# +# === \CSV with Headers +# +# CSV allows to specify column names of CSV file, whether they are in data, or +# provided separately. If headers are specified, reading methods return an instance +# of CSV::Table, consisting of CSV::Row. +# +# # Headers are part of data +# data = CSV.parse(<<~ROWS, headers: true) +# Name,Department,Salary +# Bob,Engineering,1000 +# Jane,Sales,2000 +# John,Management,5000 +# ROWS +# +# data.class #=> CSV::Table +# data.first #=> #<CSV::Row "Name":"Bob" "Department":"Engineering" "Salary":"1000"> +# data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engineering", "Salary"=>"1000"} +# +# # Headers provided by developer +# data = CSV.parse('Bob,Engineering,1000', headers: %i[name department salary]) +# data.first #=> #<CSV::Row name:"Bob" department:"Engineering" salary:"1000"> +# +# === \Converters +# +# By default, each value (field or header) parsed by \CSV is formed into a \String. +# You can use a _field_ _converter_ or _header_ _converter_ +# to intercept and modify the parsed values: +# - See {Field Converters}[#class-CSV-label-Field+Converters]. +# - See {Header Converters}[#class-CSV-label-Header+Converters]. +# +# Also by default, each value to be written during generation is written 'as-is'. +# You can use a _write_ _converter_ to modify values before writing. +# - See {Write Converters}[#class-CSV-label-Write+Converters]. +# +# ==== Specifying \Converters +# +# You can specify converters for parsing or generating in the +options+ +# argument to various \CSV methods: +# - Option +converters+ for converting parsed field values. +# - Option +header_converters+ for converting parsed header values. +# - Option +write_converters+ for converting values to be written (generated). +# +# There are three forms for specifying converters: +# - A converter proc: executable code to be used for conversion. +# - A converter name: the name of a stored converter. +# - A converter list: an array of converter procs, converter names, and converter lists. +# +# ===== Converter Procs +# +# This converter proc, +strip_converter+, accepts a value +field+ +# and returns <tt>field.strip</tt>: +# strip_converter = proc {|field| field.strip } +# In this call to <tt>CSV.parse</tt>, +# the keyword argument <tt>converters: string_converter</tt> +# specifies that: +# - \Proc +string_converter+ is to be called for each parsed field. +# - The converter's return value is to replace the +field+ value. +# Example: +# string = " foo , 0 \n bar , 1 \n baz , 2 \n" +# array = CSV.parse(string, converters: strip_converter) +# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] +# +# A converter proc can receive a second argument, +field_info+, +# that contains details about the field. +# This modified +strip_converter+ displays its arguments: +# strip_converter = proc do |field, field_info| +# p [field, field_info] +# field.strip # end +# string = " foo , 0 \n bar , 1 \n baz , 2 \n" +# array = CSV.parse(string, converters: strip_converter) +# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] +# Output: +# [" foo ", #<struct CSV::FieldInfo index=0, line=1, header=nil>] +# [" 0 ", #<struct CSV::FieldInfo index=1, line=1, header=nil>] +# [" bar ", #<struct CSV::FieldInfo index=0, line=2, header=nil>] +# [" 1 ", #<struct CSV::FieldInfo index=1, line=2, header=nil>] +# [" baz ", #<struct CSV::FieldInfo index=0, line=3, header=nil>] +# [" 2 ", #<struct CSV::FieldInfo index=1, line=3, header=nil>] +# Each CSV::FieldInfo object shows: +# - The 0-based field index. +# - The 1-based line index. +# - The field header, if any. +# +# ===== Stored \Converters +# +# A converter may be given a name and stored in a structure where +# the parsing methods can find it by name. +# +# The storage structure for field converters is the \Hash CSV::Converters. +# It has several built-in converter procs: +# - <tt>:integer</tt>: converts each \String-embedded integer into a true \Integer. +# - <tt>:float</tt>: converts each \String-embedded float into a true \Float. +# - <tt>:date</tt>: converts each \String-embedded date into a true \Date. +# - <tt>:date_time</tt>: converts each \String-embedded date-time into a true \DateTime +# . +# This example creates a converter proc, then stores it: +# strip_converter = proc {|field| field.strip } +# CSV::Converters[:strip] = strip_converter +# Then the parsing method call can refer to the converter +# by its name, <tt>:strip</tt>: +# string = " foo , 0 \n bar , 1 \n baz , 2 \n" +# array = CSV.parse(string, converters: :strip) +# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] +# +# The storage structure for header converters is the \Hash CSV::HeaderConverters, +# which works in the same way. +# It also has built-in converter procs: +# - <tt>:downcase</tt>: Downcases each header. +# - <tt>:symbol</tt>: Converts each header to a \Symbol. +# +# There is no such storage structure for write headers. +# +# In order for the parsing methods to access stored converters in non-main-Ractors, the +# storage structure must be made shareable first. +# Therefore, <tt>Ractor.make_shareable(CSV::Converters)</tt> and +# <tt>Ractor.make_shareable(CSV::HeaderConverters)</tt> must be called before the creation +# of Ractors that use the converters stored in these structures. (Since making the storage +# structures shareable involves freezing them, any custom converters that are to be used +# must be added first.) # -# === To a String +# ===== Converter Lists # -# csv_string = CSV.generate do |csv| -# csv << ["row", "of", "CSV", "data"] -# csv << ["another", "row"] -# # ... +# A _converter_ _list_ is an \Array that may include any assortment of: +# - Converter procs. +# - Names of stored converters. +# - Nested converter lists. +# +# Examples: +# numeric_converters = [:integer, :float] +# date_converters = [:date, :date_time] +# [numeric_converters, strip_converter] +# [strip_converter, date_converters, :float] +# +# Like a converter proc, a converter list may be named and stored in either +# \CSV::Converters or CSV::HeaderConverters: +# CSV::Converters[:custom] = [strip_converter, date_converters, :float] +# CSV::HeaderConverters[:custom] = [:downcase, :symbol] +# +# There are two built-in converter lists: +# CSV::Converters[:numeric] # => [:integer, :float] +# CSV::Converters[:all] # => [:date_time, :numeric] +# +# ==== Field \Converters +# +# With no conversion, all parsed fields in all rows become Strings: +# string = "foo,0\nbar,1\nbaz,2\n" +# ary = CSV.parse(string) +# ary # => # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] +# +# When you specify a field converter, each parsed field is passed to the converter; +# its return value becomes the stored value for the field. +# A converter might, for example, convert an integer embedded in a \String +# into a true \Integer. +# (In fact, that's what built-in field converter +:integer+ does.) +# +# There are three ways to use field \converters. +# +# - Using option {converters}[#class-CSV-label-Option+converters] with a parsing method: +# ary = CSV.parse(string, converters: :integer) +# ary # => [0, 1, 2] # => [["foo", 0], ["bar", 1], ["baz", 2]] +# - Using option {converters}[#class-CSV-label-Option+converters] with a new \CSV instance: +# csv = CSV.new(string, converters: :integer) +# # Field converters in effect: +# csv.converters # => [:integer] +# csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]] +# - Using method #convert to add a field converter to a \CSV instance: +# csv = CSV.new(string) +# # Add a converter. +# csv.convert(:integer) +# csv.converters # => [:integer] +# csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]] +# +# Installing a field converter does not affect already-read rows: +# csv = CSV.new(string) +# csv.shift # => ["foo", "0"] +# # Add a converter. +# csv.convert(:integer) +# csv.converters # => [:integer] +# csv.read # => [["bar", 1], ["baz", 2]] +# +# There are additional built-in \converters, and custom \converters are also supported. +# +# ===== Built-In Field \Converters +# +# The built-in field converters are in \Hash CSV::Converters: +# - Each key is a field converter name. +# - Each value is one of: +# - A \Proc field converter. +# - An \Array of field converter names. +# +# Display: +# CSV::Converters.each_pair do |name, value| +# if value.kind_of?(Proc) +# p [name, value.class] +# else +# p [name, value] +# end # end +# Output: +# [:integer, Proc] +# [:float, Proc] +# [:numeric, [:integer, :float]] +# [:date, Proc] +# [:date_time, Proc] +# [:all, [:date_time, :numeric]] +# +# Each of these converters transcodes values to UTF-8 before attempting conversion. +# If a value cannot be transcoded to UTF-8 the conversion will +# fail and the value will remain unconverted. +# +# Converter +:integer+ converts each field that Integer() accepts: +# data = '0,1,2,x' +# # Without the converter +# csv = CSV.parse_line(data) +# csv # => ["0", "1", "2", "x"] +# # With the converter +# csv = CSV.parse_line(data, converters: :integer) +# csv # => [0, 1, 2, "x"] +# +# Converter +:float+ converts each field that Float() accepts: +# data = '1.0,3.14159,x' +# # Without the converter +# csv = CSV.parse_line(data) +# csv # => ["1.0", "3.14159", "x"] +# # With the converter +# csv = CSV.parse_line(data, converters: :float) +# csv # => [1.0, 3.14159, "x"] +# +# Converter +:numeric+ converts with both +:integer+ and +:float+.. +# +# Converter +:date+ converts each field that Date::parse accepts: +# data = '2001-02-03,x' +# # Without the converter +# csv = CSV.parse_line(data) +# csv # => ["2001-02-03", "x"] +# # With the converter +# csv = CSV.parse_line(data, converters: :date) +# csv # => [#<Date: 2001-02-03 ((2451944j,0s,0n),+0s,2299161j)>, "x"] +# +# Converter +:date_time+ converts each field that DateTime::parse accepts: +# data = '2020-05-07T14:59:00-05:00,x' +# # Without the converter +# csv = CSV.parse_line(data) +# csv # => ["2020-05-07T14:59:00-05:00", "x"] +# # With the converter +# csv = CSV.parse_line(data, converters: :date_time) +# csv # => [#<DateTime: 2020-05-07T14:59:00-05:00 ((2458977j,71940s,0n),-18000s,2299161j)>, "x"] +# +# Converter +:numeric+ converts with both +:date_time+ and +:numeric+.. +# +# As seen above, method #convert adds \converters to a \CSV instance, +# and method #converters returns an \Array of the \converters in effect: +# csv = CSV.new('0,1,2') +# csv.converters # => [] +# csv.convert(:integer) +# csv.converters # => [:integer] +# csv.convert(:date) +# csv.converters # => [:integer, :date] # -# == Convert a Single Line +# ===== Custom Field \Converters # -# csv_string = ["CSV", "data"].to_csv # to CSV -# csv_array = "CSV,String".parse_csv # from CSV +# You can define a custom field converter: +# strip_converter = proc {|field| field.strip } +# string = " foo , 0 \n bar , 1 \n baz , 2 \n" +# array = CSV.parse(string, converters: strip_converter) +# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] +# You can register the converter in \Converters \Hash, +# which allows you to refer to it by name: +# CSV::Converters[:strip] = strip_converter +# string = " foo , 0 \n bar , 1 \n baz , 2 \n" +# array = CSV.parse(string, converters: :strip) +# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # -# == Shortcut Interface +# ==== Header \Converters # -# CSV { |csv_out| csv_out << %w{my data here} } # to $stdout -# CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String -# CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr -# CSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin +# Header converters operate only on headers (and not on other rows). # -# == Advanced Usage +# There are three ways to use header \converters; +# these examples use built-in header converter +:downcase+, +# which downcases each parsed header. # -# === Wrap an IO Object +# - Option +header_converters+ with a singleton parsing method: +# string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2" +# tbl = CSV.parse(string, headers: true, header_converters: :downcase) +# tbl.class # => CSV::Table +# tbl.headers # => ["name", "count"] # -# csv = CSV.new(io, options) -# # ... read (with gets() or each()) from and write (with <<) to csv here ... +# - Option +header_converters+ with a new \CSV instance: +# csv = CSV.new(string, header_converters: :downcase) +# # Header converters in effect: +# csv.header_converters # => [:downcase] +# tbl = CSV.parse(string, headers: true) +# tbl.headers # => ["Name", "Count"] # -# == CSV and Character Encodings (M17n or Multilingualization) +# - Method #header_convert adds a header converter to a \CSV instance: +# csv = CSV.new(string) +# # Add a header converter. +# csv.header_convert(:downcase) +# csv.header_converters # => [:downcase] +# tbl = CSV.parse(string, headers: true) +# tbl.headers # => ["Name", "Count"] +# +# ===== Built-In Header \Converters +# +# The built-in header \converters are in \Hash CSV::HeaderConverters. +# The keys there are the names of the \converters: +# CSV::HeaderConverters.keys # => [:downcase, :symbol] +# +# Converter +:downcase+ converts each header by downcasing it: +# string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2" +# tbl = CSV.parse(string, headers: true, header_converters: :downcase) +# tbl.class # => CSV::Table +# tbl.headers # => ["name", "count"] +# +# Converter +:symbol+ converts each header by making it into a \Symbol: +# string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2" +# tbl = CSV.parse(string, headers: true, header_converters: :symbol) +# tbl.headers # => [:name, :count] +# Details: +# - Strips leading and trailing whitespace. +# - Downcases the header. +# - Replaces embedded spaces with underscores. +# - Removes non-word characters. +# - Makes the string into a \Symbol. +# +# ===== Custom Header \Converters +# +# You can define a custom header converter: +# upcase_converter = proc {|header| header.upcase } +# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" +# table = CSV.parse(string, headers: true, header_converters: upcase_converter) +# table # => #<CSV::Table mode:col_or_row row_count:4> +# table.headers # => ["NAME", "VALUE"] +# You can register the converter in \HeaderConverters \Hash, +# which allows you to refer to it by name: +# CSV::HeaderConverters[:upcase] = upcase_converter +# table = CSV.parse(string, headers: true, header_converters: :upcase) +# table # => #<CSV::Table mode:col_or_row row_count:4> +# table.headers # => ["NAME", "VALUE"] +# +# ===== Write \Converters +# +# When you specify a write converter for generating \CSV, +# each field to be written is passed to the converter; +# its return value becomes the new value for the field. +# A converter might, for example, strip whitespace from a field. +# +# Using no write converter (all fields unmodified): +# output_string = CSV.generate do |csv| +# csv << [' foo ', 0] +# csv << [' bar ', 1] +# csv << [' baz ', 2] +# end +# output_string # => " foo ,0\n bar ,1\n baz ,2\n" +# Using option +write_converters+ with two custom write converters: +# strip_converter = proc {|field| field.respond_to?(:strip) ? field.strip : field } +# upcase_converter = proc {|field| field.respond_to?(:upcase) ? field.upcase : field } +# write_converters = [strip_converter, upcase_converter] +# output_string = CSV.generate(write_converters: write_converters) do |csv| +# csv << [' foo ', 0] +# csv << [' bar ', 1] +# csv << [' baz ', 2] +# end +# output_string # => "FOO,0\nBAR,1\nBAZ,2\n" +# +# === Character Encodings (M17n or Multilingualization) # # This new CSV parser is m17n savvy. The parser works in the Encoding of the IO -# or String object being read from or written to. Your data is never transcoded +# or String object being read from or written to. Your data is never transcoded # (unless you ask Ruby to transcode it for you) and will literally be parsed in -# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the -# Encoding of your data. This is accomplished by transcoding the parser itself +# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the +# Encoding of your data. This is accomplished by transcoding the parser itself # into your Encoding. # # Some transcoding must take place, of course, to accomplish this multiencoding -# support. For example, <tt>:col_sep</tt>, <tt>:row_sep</tt>, and +# support. For example, <tt>:col_sep</tt>, <tt>:row_sep</tt>, and # <tt>:quote_char</tt> must be transcoded to match your data. Hopefully this # makes the entire process feel transparent, since CSV's defaults should just -# magically work for you data. However, you can set these values manually in +# magically work for your data. However, you can set these values manually in # the target Encoding to avoid the translation. # # It's also important to note that while all of CSV's core parser is now -# Encoding agnostic, some features are not. For example, the built-in +# Encoding agnostic, some features are not. For example, the built-in # converters will try to transcode data to UTF-8 before making conversions. # Again, you can provide custom converters that are aware of your Encodings to -# avoid this translation. It's just too hard for me to support native +# avoid this translation. It's just too hard for me to support native # conversions in all of Ruby's Encodings. # -# Anyway, the practical side of this is simple: make sure IO and String objects +# Anyway, the practical side of this is simple: make sure IO and String objects # passed into CSV have the proper Encoding set and everything should just work. # CSV methods that allow you to open IO objects (CSV::foreach(), CSV::open(), # CSV::read(), and CSV::readlines()) do allow you to specify the Encoding. # # One minor exception comes when generating CSV into a String with an Encoding -# that is not ASCII compatible. There's no existing data for CSV to use to +# that is not ASCII compatible. There's no existing data for CSV to use to # prepare itself and thus you will probably need to manually specify the desired -# Encoding for most of those cases. It will try to guess using the fields in a +# Encoding for most of those cases. It will try to guess using the fields in a # row of output though, when using CSV::generate_line() or Array#to_csv(). # # I try to point out any other Encoding issues in the documentation of methods # as they come up. # # This has been tested to the best of my ability with all non-"dummy" Encodings -# Ruby ships with. However, it is brave new code and may have some bugs. +# Ruby ships with. However, it is brave new code and may have some bugs. # Please feel free to {report}[mailto:james@grayproductions.net] any issues you # find with it. # class CSV - # The version of the installed library. - VERSION = "2.4.8".freeze + + # The error thrown when the parser encounters illegal CSV formatting. + class MalformedCSVError < RuntimeError + attr_reader :line_number + alias_method :lineno, :line_number + def initialize(message, line_number) + @line_number = line_number + super("#{message} in line #{line_number}.") + end + end # - # A CSV::Row is part Array and part Hash. It retains an order for the fields - # and allows duplicates just as an Array would, but also allows you to access - # fields by name just as you could if they were in a Hash. + # A FieldInfo Struct contains details about a field's position in the data + # source it was read from. CSV will pass this Struct to some blocks that make + # decisions based on field structure. See CSV.convert_fields() for an + # example. # - # All rows returned by CSV will be constructed from this class, if header row - # processing is activated. + # <b><tt>index</tt></b>:: The zero-based index of the field in its row. + # <b><tt>line</tt></b>:: The line of the data source this row is from. + # <b><tt>header</tt></b>:: The header for the column, when available. + # <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not. # - class Row - # - # Construct a new CSV::Row from +headers+ and +fields+, which are expected - # to be Arrays. If one Array is shorter than the other, it will be padded - # with +nil+ objects. - # - # The optional +header_row+ parameter can be set to +true+ to indicate, via - # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header - # row. Otherwise, the row is assumes to be a field row. - # - # A CSV::Row object supports the following Array methods through delegation: - # - # * empty?() - # * length() - # * size() - # - def initialize(headers, fields, header_row = false) - @header_row = header_row - headers.each { |h| h.freeze if h.is_a? String } - - # handle extra headers or fields - @row = if headers.size >= fields.size - headers.zip(fields) - else - fields.zip(headers).map { |pair| pair.reverse! } - end - end + FieldInfo = Struct.new(:index, :line, :header, :quoted?) - # Internal data format used to compare equality. - attr_reader :row - protected :row - - ### Array Delegation ### - - extend Forwardable - def_delegators :@row, :empty?, :length, :size + # A Regexp used to find and convert some common Date formats. + DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | + \d{4}-\d{2}-\d{2} )\z /x + # A Regexp used to find and convert some common DateTime formats. + DateTimeMatcher = + / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} | + # ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse + \d{4}-\d{2}-\d{2} + (?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)? + )\z /x - # Returns +true+ if this is a header row. - def header_row? - @header_row - end + # The encoding used by all converters. + ConverterEncoding = Encoding.find("UTF-8") - # Returns +true+ if this is a field row. - def field_row? - not header_row? - end + # A \Hash containing the names and \Procs for the built-in field converters. + # See {Built-In Field Converters}[#class-CSV-label-Built-In+Field+Converters]. + # + # This \Hash is intentionally left unfrozen, and may be extended with + # custom field converters. + # See {Custom Field Converters}[#class-CSV-label-Custom+Field+Converters]. + Converters = { + integer: lambda { |f| + Integer(f.encode(ConverterEncoding)) rescue f + }, + float: lambda { |f| + Float(f.encode(ConverterEncoding)) rescue f + }, + numeric: [:integer, :float], + date: lambda { |f| + begin + e = f.encode(ConverterEncoding) + e.match?(DateMatcher) ? Date.parse(e) : f + rescue # encoding conversion or date parse errors + f + end + }, + date_time: lambda { |f| + begin + e = f.encode(ConverterEncoding) + e.match?(DateTimeMatcher) ? DateTime.parse(e) : f + rescue # encoding conversion or date parse errors + f + end + }, + all: [:date_time, :numeric], + } - # Returns the headers of this row. - def headers - @row.map { |pair| pair.first } - end + # A \Hash containing the names and \Procs for the built-in header converters. + # See {Built-In Header Converters}[#class-CSV-label-Built-In+Header+Converters]. + # + # This \Hash is intentionally left unfrozen, and may be extended with + # custom field converters. + # See {Custom Header Converters}[#class-CSV-label-Custom+Header+Converters]. + HeaderConverters = { + downcase: lambda { |h| h.encode(ConverterEncoding).downcase }, + symbol: lambda { |h| + h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip. + gsub(/\s+/, "_").to_sym + }, + symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym } + } - # + # Default values for method options. + DEFAULT_OPTIONS = { + # For both parsing and generating. + col_sep: ",", + row_sep: :auto, + quote_char: '"', + # For parsing. + field_size_limit: nil, + max_field_size: nil, + converters: nil, + unconverted_fields: nil, + headers: false, + return_headers: false, + header_converters: nil, + skip_blanks: false, + skip_lines: nil, + liberal_parsing: false, + nil_value: nil, + empty_value: "", + strip: false, + # For generating. + write_headers: nil, + quote_empty: true, + force_quotes: false, + write_converters: nil, + write_nil_value: nil, + write_empty_value: "", + }.freeze + + class << self # :call-seq: - # field( header ) - # field( header, offset ) - # field( index ) - # - # This method will return the field value by +header+ or +index+. If a field - # is not found, +nil+ is returned. - # - # When provided, +offset+ ensures that a header match occurs on or later - # than the +offset+ index. You can use this to find duplicate headers, - # without resorting to hard-coding exact indices. - # - def field(header_or_index, minimum_index = 0) - # locate the pair - finder = header_or_index.is_a?(Integer) ? :[] : :assoc - pair = @row[minimum_index..-1].send(finder, header_or_index) - - # return the field if we have a pair - pair.nil? ? nil : pair.last + # instance(string, **options) + # instance(io = $stdout, **options) + # instance(string, **options) {|csv| ... } + # instance(io = $stdout, **options) {|csv| ... } + # + # Creates or retrieves cached \CSV objects. + # For arguments and options, see CSV.new. + # + # This API is not Ractor-safe. + # + # --- + # + # With no block given, returns a \CSV object. + # + # The first call to +instance+ creates and caches a \CSV object: + # s0 = 's0' + # csv0 = CSV.instance(s0) + # csv0.class # => CSV + # + # Subsequent calls to +instance+ with that _same_ +string+ or +io+ + # retrieve that same cached object: + # csv1 = CSV.instance(s0) + # csv1.class # => CSV + # csv1.equal?(csv0) # => true # Same CSV object + # + # A subsequent call to +instance+ with a _different_ +string+ or +io+ + # creates and caches a _different_ \CSV object. + # s1 = 's1' + # csv2 = CSV.instance(s1) + # csv2.equal?(csv0) # => false # Different CSV object + # + # All the cached objects remains available: + # csv3 = CSV.instance(s0) + # csv3.equal?(csv0) # true # Same CSV object + # csv4 = CSV.instance(s1) + # csv4.equal?(csv2) # true # Same CSV object + # + # --- + # + # When a block is given, calls the block with the created or retrieved + # \CSV object; returns the block's return value: + # CSV.instance(s0) {|csv| :foo } # => :foo + def instance(data = $stdout, **options) + # create a _signature_ for this method call, data object and options + sig = [data.object_id] + + options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s }) + + # fetch or create the instance for this signature + @@instances ||= Hash.new + instance = (@@instances[sig] ||= new(data, **options)) + + if block_given? + yield instance # run block, if given, returning result + else + instance # or return the instance + end end - alias_method :[], :field - # # :call-seq: - # fetch( header ) - # fetch( header ) { |row| ... } - # fetch( header, default ) - # - # This method will fetch the field value by +header+. It has the same - # behavior as Hash#fetch: if there is a field with the given +header+, its - # value is returned. Otherwise, if a block is given, it is yielded the - # +header+ and its result is returned; if a +default+ is given as the - # second argument, it is returned; otherwise a KeyError is raised. - # - def fetch(header, *varargs) - raise ArgumentError, "Too many arguments" if varargs.length > 1 - pair = @row.assoc(header) - if pair - pair.last - else - if block_given? - yield header - elsif varargs.empty? - raise KeyError, "key not found: #{header}" + # filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table + # filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table + # filter(**options) {|row| ... } -> array_of_arrays or csv_table + # + # - Parses \CSV from a source (\String, \IO stream, or ARGF). + # - Calls the given block with each parsed row: + # - Without headers, each row is an \Array. + # - With headers, each row is a CSV::Row. + # - Generates \CSV to an output (\String, \IO stream, or STDOUT). + # - Returns the parsed source: + # - Without headers, an \Array of \Arrays. + # - With headers, a CSV::Table. + # + # When +in_string_or_io+ is given, but not +out_string_or_io+, + # parses from the given +in_string_or_io+ + # and generates to STDOUT. + # + # \String input without headers: + # + # in_string = "foo,0\nbar,1\nbaz,2" + # CSV.filter(in_string) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] + # + # Output (to STDOUT): + # + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # \String input with headers: + # + # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2" + # CSV.filter(in_string, headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end # => #<CSV::Table mode:col_or_row row_count:4> + # + # Output (to STDOUT): + # + # Name,Value + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # \IO stream input without headers: + # + # File.write('t.csv', "foo,0\nbar,1\nbaz,2") + # File.open('t.csv') do |in_io| + # CSV.filter(in_io) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] + # + # Output (to STDOUT): + # + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # \IO stream input with headers: + # + # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2") + # File.open('t.csv') do |in_io| + # CSV.filter(in_io, headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # end # => #<CSV::Table mode:col_or_row row_count:4> + # + # Output (to STDOUT): + # + # Name,Value + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # When both +in_string_or_io+ and +out_string_or_io+ are given, + # parses from +in_string_or_io+ and generates to +out_string_or_io+. + # + # \String output without headers: + # + # in_string = "foo,0\nbar,1\nbaz,2" + # out_string = '' + # CSV.filter(in_string, out_string) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] + # out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n" + # + # \String output with headers: + # + # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2" + # out_string = '' + # CSV.filter(in_string, out_string, headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end # => #<CSV::Table mode:col_or_row row_count:4> + # out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n" + # + # \IO stream output without headers: + # + # in_string = "foo,0\nbar,1\nbaz,2" + # File.open('t.csv', 'w') do |out_io| + # CSV.filter(in_string, out_io) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] + # File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n" + # + # \IO stream output with headers: + # + # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2" + # File.open('t.csv', 'w') do |out_io| + # CSV.filter(in_string, out_io, headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # end # => #<CSV::Table mode:col_or_row row_count:4> + # File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n" + # + # When neither +in_string_or_io+ nor +out_string_or_io+ given, + # parses from {ARGF}[https://docs.ruby-lang.org/en/master/ARGF.html] + # and generates to STDOUT. + # + # Without headers: + # + # # Put Ruby code into a file. + # ruby = <<-EOT + # require 'csv' + # CSV.filter do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # EOT + # File.write('t.rb', ruby) + # # Put some CSV into a file. + # File.write('t.csv', "foo,0\nbar,1\nbaz,2") + # # Run the Ruby code with CSV filename as argument. + # system(Gem.ruby, "t.rb", "t.csv") + # + # Output (to STDOUT): + # + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # With headers: + # + # # Put Ruby code into a file. + # ruby = <<-EOT + # require 'csv' + # CSV.filter(headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # EOT + # File.write('t.rb', ruby) + # # Put some CSV into a file. + # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2") + # # Run the Ruby code with CSV filename as argument. + # system(Gem.ruby, "t.rb", "t.csv") + # + # Output (to STDOUT): + # + # Name,Value + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # Arguments: + # + # * Argument +in_string_or_io+ must be a \String or an \IO stream. + # * Argument +out_string_or_io+ must be a \String or an \IO stream. + # * Arguments <tt>**options</tt> must be keyword options. + # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]. + def filter(input=nil, output=nil, **options) + # parse options for input, output, or both + in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value} + options.each do |key, value| + case key.to_s + when /\Ain(?:put)?_(.+)\Z/ + in_options[$1.to_sym] = value + when /\Aout(?:put)?_(.+)\Z/ + out_options[$1.to_sym] = value else - varargs.first + in_options[key] = value + out_options[key] = value end end - end - # Returns +true+ if there is a field with the given +header+. - def has_key?(header) - !!@row.assoc(header) + # build input and output wrappers + input = new(input || ARGF, **in_options) + output = new(output || $stdout, **out_options) + + # process headers + need_manual_header_output = + (in_options[:headers] and + out_options[:headers] == true and + out_options[:write_headers]) + if need_manual_header_output + first_row = input.shift + if first_row + if first_row.is_a?(Row) + headers = first_row.headers + yield headers + output << headers + end + yield first_row + output << first_row + end + end + + # read, yield, write + input.each do |row| + yield row + output << row + end end - alias_method :include?, :has_key? - alias_method :key?, :has_key? - alias_method :member?, :has_key? # # :call-seq: - # []=( header, value ) - # []=( header, offset, value ) - # []=( index, value ) - # - # Looks up the field by the semantics described in CSV::Row.field() and - # assigns the +value+. - # - # Assigning past the end of the row with an index will set all pairs between - # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new - # pair. - # - def []=(*args) - value = args.pop - - if args.first.is_a? Integer - if @row[args.first].nil? # extending past the end with index - @row[args.first] = [nil, value] - @row.map! { |pair| pair.nil? ? [nil, nil] : pair } - else # normal index assignment - @row[args.first][1] = value - end - else - index = index(*args) - if index.nil? # appending a field - self << [args.first, value] - else # normal header assignment - @row[index][1] = value - end + # foreach(path_or_io, mode='r', **options) {|row| ... ) + # foreach(path_or_io, mode='r', **options) -> new_enumerator + # + # Calls the block with each row read from source +path_or_io+. + # + # \Path input without headers: + # + # string = "foo,0\nbar,1\nbaz,2\n" + # in_path = 't.csv' + # File.write(in_path, string) + # CSV.foreach(in_path) {|row| p row } + # + # Output: + # + # ["foo", "0"] + # ["bar", "1"] + # ["baz", "2"] + # + # \Path input with headers: + # + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # in_path = 't.csv' + # File.write(in_path, string) + # CSV.foreach(in_path, headers: true) {|row| p row } + # + # Output: + # + # <CSV::Row "Name":"foo" "Value":"0"> + # <CSV::Row "Name":"bar" "Value":"1"> + # <CSV::Row "Name":"baz" "Value":"2"> + # + # \IO stream input without headers: + # + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # File.open('t.csv') do |in_io| + # CSV.foreach(in_io) {|row| p row } + # end + # + # Output: + # + # ["foo", "0"] + # ["bar", "1"] + # ["baz", "2"] + # + # \IO stream input with headers: + # + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # File.open('t.csv') do |in_io| + # CSV.foreach(in_io, headers: true) {|row| p row } + # end + # + # Output: + # + # <CSV::Row "Name":"foo" "Value":"0"> + # <CSV::Row "Name":"bar" "Value":"1"> + # <CSV::Row "Name":"baz" "Value":"2"> + # + # With no block given, returns an \Enumerator: + # + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")> + # + # Arguments: + # * Argument +path_or_io+ must be a file path or an \IO stream. + # * Argument +mode+, if given, must be a \File mode + # See {Open Mode}[https://ruby-doc.org/core/IO.html#method-c-new-label-Open+Mode]. + # * Arguments <tt>**options</tt> must be keyword options. + # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]. + # * This method optionally accepts an additional <tt>:encoding</tt> option + # that you can use to specify the Encoding of the data read from +path+ or +io+. + # You must provide this unless your data is in the encoding + # given by <tt>Encoding::default_external</tt>. + # Parsing will use this to determine how to parse the data. + # You may provide a second Encoding to + # have the data transcoded as it is read. For example, + # encoding: 'UTF-32BE:UTF-8' + # would read +UTF-32BE+ data from the file + # but transcode it to +UTF-8+ before parsing. + def foreach(path, mode="r", **options, &block) + return to_enum(__method__, path, mode, **options) unless block_given? + open(path, mode, **options) do |csv| + csv.each(&block) end end # # :call-seq: - # <<( field ) - # <<( header_and_field_array ) - # <<( header_and_field_hash ) - # - # If a two-element Array is provided, it is assumed to be a header and field - # and the pair is appended. A Hash works the same way with the key being - # the header and the value being the field. Anything else is assumed to be - # a lone field which is appended with a +nil+ header. - # - # This method returns the row for chaining. - # - def <<(arg) - if arg.is_a?(Array) and arg.size == 2 # appending a header and name - @row << arg - elsif arg.is_a?(Hash) # append header and name pairs - arg.each { |pair| @row << pair } - else # append field value - @row << [nil, arg] + # generate(csv_string, **options) {|csv| ... } + # generate(**options) {|csv| ... } + # + # * Argument +csv_string+, if given, must be a \String object; + # defaults to a new empty \String. + # * Arguments +options+, if given, should be generating options. + # See {Options for Generating}[#class-CSV-label-Options+for+Generating]. + # + # --- + # + # Creates a new \CSV object via <tt>CSV.new(csv_string, **options)</tt>; + # calls the block with the \CSV object, which the block may modify; + # returns the \String generated from the \CSV object. + # + # Note that a passed \String *is* modified by this method. + # Pass <tt>csv_string</tt>.dup if the \String must be preserved. + # + # This method has one additional option: <tt>:encoding</tt>, + # which sets the base Encoding for the output if no no +str+ is specified. + # CSV needs this hint if you plan to output non-ASCII compatible data. + # + # --- + # + # Add lines: + # input_string = "foo,0\nbar,1\nbaz,2\n" + # output_string = CSV.generate(input_string) do |csv| + # csv << ['bat', 3] + # csv << ['bam', 4] + # end + # output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n" + # input_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n" + # output_string.equal?(input_string) # => true # Same string, modified + # + # Add lines into new string, preserving old string: + # input_string = "foo,0\nbar,1\nbaz,2\n" + # output_string = CSV.generate(input_string.dup) do |csv| + # csv << ['bat', 3] + # csv << ['bam', 4] + # end + # output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n" + # input_string # => "foo,0\nbar,1\nbaz,2\n" + # output_string.equal?(input_string) # => false # Different strings + # + # Create lines from nothing: + # output_string = CSV.generate do |csv| + # csv << ['foo', 0] + # csv << ['bar', 1] + # csv << ['baz', 2] + # end + # output_string # => "foo,0\nbar,1\nbaz,2\n" + # + # --- + # + # Raises an exception if +csv_string+ is not a \String object: + # # Raises TypeError (no implicit conversion of Integer into String) + # CSV.generate(0) + # + def generate(str=nil, **options) + encoding = options[:encoding] + # add a default empty String, if none was given + if str + str = StringIO.new(str) + str.seek(0, IO::SEEK_END) + str.set_encoding(encoding) if encoding + else + str = +"" + str.force_encoding(encoding) if encoding end - - self # for chaining + csv = new(str, **options) # wrap + yield csv # yield for appending + csv.string # return final String end + # :call-seq: + # CSV.generate_line(ary) + # CSV.generate_line(ary, **options) # - # A shortcut for appending multiple fields. Equivalent to: + # Returns the \String created by generating \CSV from +ary+ + # using the specified +options+. # - # args.each { |arg| csv_row << arg } + # Argument +ary+ must be an \Array. # - # This method returns the row for chaining. + # Special options: + # * Option <tt>:row_sep</tt> defaults to <tt>"\n"> on Ruby 3.0 or later + # and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.: + # $INPUT_RECORD_SEPARATOR # => "\n" + # * This method accepts an additional option, <tt>:encoding</tt>, which sets the base + # Encoding for the output. This method will try to guess your Encoding from + # the first non-+nil+ field in +row+, if possible, but you may need to use + # this parameter as a backup plan. # - def push(*args) - args.each { |arg| self << arg } - - self # for chaining - end - + # For other +options+, + # see {Options for Generating}[#class-CSV-label-Options+for+Generating]. # - # :call-seq: - # delete( header ) - # delete( header, offset ) - # delete( index ) - # - # Used to remove a pair from the row by +header+ or +index+. The pair is - # located as described in CSV::Row.field(). The deleted pair is returned, - # or +nil+ if a pair could not be found. - # - def delete(header_or_index, minimum_index = 0) - if header_or_index.is_a? Integer # by index - @row.delete_at(header_or_index) - elsif i = index(header_or_index, minimum_index) # by header - @row.delete_at(i) - else - [ ] - end - end - + # --- # - # The provided +block+ is passed a header and field for each pair in the row - # and expected to return +true+ or +false+, depending on whether the pair - # should be deleted. + # Returns the \String generated from an \Array: + # CSV.generate_line(['foo', '0']) # => "foo,0\n" # - # This method returns the row for chaining. + # --- # - def delete_if(&block) - @row.delete_if(&block) - - self # for chaining - end - + # Raises an exception if +ary+ is not an \Array: + # # Raises NoMethodError (undefined method `find' for :foo:Symbol) + # CSV.generate_line(:foo) # - # This method accepts any number of arguments which can be headers, indices, - # Ranges of either, or two-element Arrays containing a header and offset. - # Each argument will be replaced with a field lookup as described in - # CSV::Row.field(). - # - # If called with no arguments, all fields are returned. - # - def fields(*headers_and_or_indices) - if headers_and_or_indices.empty? # return all fields--no arguments - @row.map { |pair| pair.last } - else # or work like values_at() - headers_and_or_indices.inject(Array.new) do |all, h_or_i| - all + if h_or_i.is_a? Range - index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin : - index(h_or_i.begin) - index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end : - index(h_or_i.end) - new_range = h_or_i.exclude_end? ? (index_begin...index_end) : - (index_begin..index_end) - fields.values_at(new_range) - else - [field(*Array(h_or_i))] - end + def generate_line(row, **options) + options = {row_sep: InputRecordSeparator.value}.merge(options) + str = +"" + if options[:encoding] + str.force_encoding(options[:encoding]) + else + fallback_encoding = nil + output_encoding = nil + row.each do |field| + next unless field.is_a?(String) + fallback_encoding ||= field.encoding + next if field.ascii_only? + output_encoding = field.encoding + break + end + output_encoding ||= fallback_encoding + if output_encoding + str.force_encoding(output_encoding) end end + (new(str, **options) << row).string end - alias_method :values_at, :fields - # # :call-seq: - # index( header ) - # index( header, offset ) - # - # This method will return the index of a field with the provided +header+. - # The +offset+ can be used to locate duplicate header names, as described in - # CSV::Row.field(). - # - def index(header, minimum_index = 0) - # find the pair - index = headers[minimum_index..-1].index(header) - # return the index at the right offset, if we found one - index.nil? ? nil : index + minimum_index - end - - # Returns +true+ if +name+ is a header for this row, and +false+ otherwise. - def header?(name) - headers.include? name - end - alias_method :include?, :header? - + # CSV.generate_lines(rows) + # CSV.generate_lines(rows, **options) # - # Returns +true+ if +data+ matches a field in this row, and +false+ - # otherwise. + # Returns the \String created by generating \CSV from + # using the specified +options+. # - def field?(data) - fields.include? data - end - - include Enumerable - + # Argument +rows+ must be an \Array of row. Row is \Array of \String or \CSV::Row. # - # Yields each pair of the row as header and field tuples (much like - # iterating over a Hash). + # Special options: + # * Option <tt>:row_sep</tt> defaults to <tt>"\n"</tt> on Ruby 3.0 or later + # and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.: + # $INPUT_RECORD_SEPARATOR # => "\n" + # * This method accepts an additional option, <tt>:encoding</tt>, which sets the base + # Encoding for the output. This method will try to guess your Encoding from + # the first non-+nil+ field in +row+, if possible, but you may need to use + # this parameter as a backup plan. # - # Support for Enumerable. + # For other +options+, + # see {Options for Generating}[#class-CSV-label-Options+for+Generating]. # - # This method returns the row for chaining. + # --- # - def each(&block) - @row.each(&block) - - self # for chaining - end - + # Returns the \String generated from an + # CSV.generate_lines(['foo', '0'], ['bar', '1'], ['baz', '2']) # => "foo,0\nbar,1\nbaz.2\n" # - # Returns +true+ if this row contains the same headers and fields in the - # same order as +other+. + # --- # - def ==(other) - return @row == other.row if other.is_a? CSV::Row - @row == other - end - - # - # Collapses the row into a simple Hash. Be warning that this discards field - # order and clobbers duplicate fields. + # Raises an exception + # # Raises NoMethodError (undefined method `find' for :foo:Symbol) + # CSV.generate_lines(:foo) # - def to_hash - # flatten just one level of the internal Array - Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }] + def generate_lines(rows, **options) + self.generate(**options) do |csv| + rows.each do |row| + csv << row + end + end end # - # Returns the row as a CSV String. Headers are not used. Equivalent to: - # - # csv_row.fields.to_csv( options ) - # - def to_csv(options = Hash.new) - fields.to_csv(options) - end - alias_method :to_s, :to_csv - - # A summary of fields, by header, in an ASCII compatible String. - def inspect - str = ["#<", self.class.to_s] - each do |header, field| - str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) << - ":" << field.inspect + # :call-seq: + # open(file_path, mode = "rb", **options ) -> new_csv + # open(io, mode = "rb", **options ) -> new_csv + # open(file_path, mode = "rb", **options ) { |csv| ... } -> object + # open(io, mode = "rb", **options ) { |csv| ... } -> object + # + # possible options elements: + # keyword form: + # :invalid => nil # raise error on invalid byte sequence (default) + # :invalid => :replace # replace invalid byte sequence + # :undef => :replace # replace undefined conversion + # :replace => string # replacement string ("?" or "\uFFFD" if not specified) + # + # * Argument +path+, if given, must be the path to a file. + # :include: ../doc/csv/arguments/io.rdoc + # * Argument +mode+, if given, must be a \File mode + # See {Open Mode}[IO.html#method-c-new-label-Open+Mode]. + # * Arguments <tt>**options</tt> must be keyword options. + # See {Options for Generating}[#class-CSV-label-Options+for+Generating]. + # * This method optionally accepts an additional <tt>:encoding</tt> option + # that you can use to specify the Encoding of the data read from +path+ or +io+. + # You must provide this unless your data is in the encoding + # given by <tt>Encoding::default_external</tt>. + # Parsing will use this to determine how to parse the data. + # You may provide a second Encoding to + # have the data transcoded as it is read. For example, + # encoding: 'UTF-32BE:UTF-8' + # would read +UTF-32BE+ data from the file + # but transcode it to +UTF-8+ before parsing. + # + # --- + # + # These examples assume prior execution of: + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # + # --- + # + # With no block given, returns a new \CSV object. + # + # Create a \CSV object using a file path: + # csv = CSV.open(path) + # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\""> + # + # Create a \CSV object using an open \File: + # csv = CSV.open(File.open(path)) + # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\""> + # + # --- + # + # With a block given, calls the block with the created \CSV object; + # returns the block's return value: + # + # Using a file path: + # csv = CSV.open(path) {|csv| p csv} + # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\""> + # Output: + # #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\""> + # + # Using an open \File: + # csv = CSV.open(File.open(path)) {|csv| p csv} + # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\""> + # Output: + # #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\""> + # + # --- + # + # Raises an exception if the argument is not a \String object or \IO object: + # # Raises TypeError (no implicit conversion of Symbol into String) + # CSV.open(:foo) + def open(filename, mode="r", **options) + # wrap a File opened with the remaining +args+ with no newline + # decorator + file_opts = options.dup + unless file_opts.key?(:newline) + file_opts[:universal_newline] ||= false + end + options.delete(:invalid) + options.delete(:undef) + options.delete(:replace) + options.delete_if {|k, _| /newline\z/.match?(k)} + + begin + f = File.open(filename, mode, **file_opts) + rescue ArgumentError => e + raise unless /needs binmode/.match?(e.message) and mode == "r" + mode = "rb" + file_opts = {encoding: Encoding.default_external}.merge(file_opts) + retry end - str << ">" begin - str.join('') - rescue # any encoding error - str.map do |s| - e = Encoding::Converter.asciicompat_encoding(s.encoding) - e ? s.encode(e) : s.force_encoding("ASCII-8BIT") - end.join('') + csv = new(f, **options) + rescue Exception + f.close + raise + end + + # handle blocks like Ruby's open(), not like the CSV library + if block_given? + begin + yield csv + ensure + csv.close + end + else + csv end end - end - # - # A CSV::Table is a two-dimensional data structure for representing CSV - # documents. Tables allow you to work with the data by row or column, - # manipulate the data, and even convert the results back to CSV, if needed. - # - # All tables returned by CSV will be constructed from this class, if header - # row processing is activated. - # - class Table # - # Construct a new CSV::Table from +array_of_rows+, which are expected - # to be CSV::Row objects. All rows are assumed to have the same headers. + # :call-seq: + # parse(string) -> array_of_arrays + # parse(io) -> array_of_arrays + # parse(string, headers: ..., **options) -> csv_table + # parse(io, headers: ..., **options) -> csv_table + # parse(string, **options) {|row| ... } + # parse(io, **options) {|row| ... } # - # A CSV::Table object supports the following Array methods through - # delegation: + # Parses +string+ or +io+ using the specified +options+. # - # * empty?() - # * length() - # * size() + # - Argument +string+ should be a \String object; + # it will be put into a new StringIO object positioned at the beginning. + # :include: ../doc/csv/arguments/io.rdoc + # - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing] # - def initialize(array_of_rows) - @table = array_of_rows - @mode = :col_or_row - end - - # The current access mode for indexing and iteration. - attr_reader :mode - - # Internal data format used to compare equality. - attr_reader :table - protected :table - - ### Array Delegation ### - - extend Forwardable - def_delegators :@table, :empty?, :length, :size - + # ====== Without Option +headers+ # - # Returns a duplicate table object, in column mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. + # Without {option +headers+}[#class-CSV-label-Option+headers] case. # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. + # These examples assume prior execution of: + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) # - def by_col - self.class.new(@table.dup).by_col! - end - + # --- # - # Switches the mode of this table to column mode. All calls to indexing and - # iteration methods will work with columns until the mode is changed again. + # With no block given, returns an \Array of Arrays formed from the source. # - # This method returns the table and is safe to chain. + # Parse a \String: + # a_of_a = CSV.parse(string) + # a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # - def by_col! - @mode = :col - - self - end - + # Parse an open \File: + # a_of_a = File.open(path) do |file| + # CSV.parse(file) + # end + # a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # - # Returns a duplicate table object, in mixed mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. + # --- # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. + # With a block given, calls the block with each parsed row: # - def by_col_or_row - self.class.new(@table.dup).by_col_or_row! - end - + # Parse a \String: + # CSV.parse(string) {|row| p row } # - # Switches the mode of this table to mixed mode. All calls to indexing and - # iteration methods will use the default intelligent indexing system until - # the mode is changed again. In mixed mode an index is assumed to be a row - # reference while anything else is assumed to be column access by headers. + # Output: + # ["foo", "0"] + # ["bar", "1"] + # ["baz", "2"] # - # This method returns the table and is safe to chain. + # Parse an open \File: + # File.open(path) do |file| + # CSV.parse(file) {|row| p row } + # end # - def by_col_or_row! - @mode = :col_or_row - - self - end - + # Output: + # ["foo", "0"] + # ["bar", "1"] + # ["baz", "2"] # - # Returns a duplicate table object, in row mode. This is handy for chaining - # in a single call without changing the table mode, but be aware that this - # method can consume a fair amount of memory for bigger data sets. + # ====== With Option +headers+ # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. + # With {option +headers+}[#class-CSV-label-Option+headers] case. # - def by_row - self.class.new(@table.dup).by_row! - end - + # These examples assume prior execution of: + # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) # - # Switches the mode of this table to row mode. All calls to indexing and - # iteration methods will work with rows until the mode is changed again. + # --- # - # This method returns the table and is safe to chain. + # With no block given, returns a CSV::Table object formed from the source. # - def by_row! - @mode = :row - - self - end - + # Parse a \String: + # csv_table = CSV.parse(string, headers: ['Name', 'Count']) + # csv_table # => #<CSV::Table mode:col_or_row row_count:5> # - # Returns the headers for the first row of this table (assumed to match all - # other rows). An empty Array is returned for empty tables. + # Parse an open \File: + # csv_table = File.open(path) do |file| + # CSV.parse(file, headers: ['Name', 'Count']) + # end + # csv_table # => #<CSV::Table mode:col_or_row row_count:4> # - def headers - if @table.empty? - Array.new - else - @table.first.headers - end - end - - # - # In the default mixed mode, this method returns rows for index access and - # columns for header access. You can force the index association by first - # calling by_col!() or by_row!(). + # --- # - # Columns are returned as an Array of values. Altering that Array has no - # effect on the table. + # With a block given, calls the block with each parsed row, + # which has been formed into a CSV::Row object: # - def [](index_or_header) - if @mode == :row or # by index - (@mode == :col_or_row and index_or_header.is_a? Integer) - @table[index_or_header] - else # by header - @table.map { |row| row[index_or_header] } - end - end - + # Parse a \String: + # CSV.parse(string, headers: ['Name', 'Count']) {|row| p row } # - # In the default mixed mode, this method assigns rows for index access and - # columns for header access. You can force the index association by first - # calling by_col!() or by_row!(). + # Output: + # # <CSV::Row "Name":"foo" "Count":"0"> + # # <CSV::Row "Name":"bar" "Count":"1"> + # # <CSV::Row "Name":"baz" "Count":"2"> # - # Rows may be set to an Array of values (which will inherit the table's - # headers()) or a CSV::Row. + # Parse an open \File: + # File.open(path) do |file| + # CSV.parse(file, headers: ['Name', 'Count']) {|row| p row } + # end # - # Columns may be set to a single value, which is copied to each row of the - # column, or an Array of values. Arrays of values are assigned to rows top - # to bottom in row major order. Excess values are ignored and if the Array - # does not have a value for each row the extra rows will receive a +nil+. + # Output: + # # <CSV::Row "Name":"foo" "Count":"0"> + # # <CSV::Row "Name":"bar" "Count":"1"> + # # <CSV::Row "Name":"baz" "Count":"2"> # - # Assigning to an existing column or row clobbers the data. Assigning to - # new columns creates them at the right end of the table. + # --- # - def []=(index_or_header, value) - if @mode == :row or # by index - (@mode == :col_or_row and index_or_header.is_a? Integer) - if value.is_a? Array - @table[index_or_header] = Row.new(headers, value) - else - @table[index_or_header] = value - end - else # set column - if value.is_a? Array # multiple values - @table.each_with_index do |row, i| - if row.header_row? - row[index_or_header] = index_or_header - else - row[index_or_header] = value[i] - end - end - else # repeated value - @table.each do |row| - if row.header_row? - row[index_or_header] = index_or_header - else - row[index_or_header] = value - end - end - end - end - end + # Raises an exception if the argument is not a \String object or \IO object: + # # Raises NoMethodError (undefined method `close' for :foo:Symbol) + # CSV.parse(:foo) + def parse(str, **options, &block) + csv = new(str, **options) - # - # The mixed mode default is to treat a list of indices as row access, - # returning the rows indicated. Anything else is considered columnar - # access. For columnar access, the return set has an Array for each row - # with the values indicated by the headers in each Array. You can force - # column or row mode using by_col!() or by_row!(). - # - # You cannot mix column and row access. - # - def values_at(*indices_or_headers) - if @mode == :row or # by indices - ( @mode == :col_or_row and indices_or_headers.all? do |index| - index.is_a?(Integer) or - ( index.is_a?(Range) and - index.first.is_a?(Integer) and - index.last.is_a?(Integer) ) - end ) - @table.values_at(*indices_or_headers) - else # by headers - @table.map { |row| row.values_at(*indices_or_headers) } + return csv.each(&block) if block_given? + + # slurp contents, if no block is given + begin + csv.read + ensure + csv.close end end + # :call-seq: + # CSV.parse_line(string) -> new_array or nil + # CSV.parse_line(io) -> new_array or nil + # CSV.parse_line(string, **options) -> new_array or nil + # CSV.parse_line(io, **options) -> new_array or nil + # CSV.parse_line(string, headers: true, **options) -> csv_row or nil + # CSV.parse_line(io, headers: true, **options) -> csv_row or nil # - # Adds a new row to the bottom end of this table. You can provide an Array, - # which will be converted to a CSV::Row (inheriting the table's headers()), - # or a CSV::Row. - # - # This method returns the table for chaining. + # Returns the data created by parsing the first line of +string+ or +io+ + # using the specified +options+. # - def <<(row_or_array) - if row_or_array.is_a? Array # append Array - @table << Row.new(headers, row_or_array) - else # append Row - @table << row_or_array - end - - self # for chaining - end - + # - Argument +string+ should be a \String object; + # it will be put into a new StringIO object positioned at the beginning. + # :include: ../doc/csv/arguments/io.rdoc + # - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing] # - # A shortcut for appending multiple rows. Equivalent to: + # ====== Without Option +headers+ # - # rows.each { |row| self << row } + # Without option +headers+, returns the first row as a new \Array. # - # This method returns the table for chaining. + # These examples assume prior execution of: + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) # - def push(*rows) - rows.each { |row| self << row } - - self # for chaining - end - + # Parse the first line from a \String object: + # CSV.parse_line(string) # => ["foo", "0"] # - # Removes and returns the indicated column or row. In the default mixed - # mode indices refer to rows and everything else is assumed to be a column - # header. Use by_col!() or by_row!() to force the lookup. + # Parse the first line from a File object: + # File.open(path) do |file| + # CSV.parse_line(file) # => ["foo", "0"] + # end # => ["foo", "0"] # - def delete(index_or_header) - if @mode == :row or # by index - (@mode == :col_or_row and index_or_header.is_a? Integer) - @table.delete_at(index_or_header) - else # by header - @table.map { |row| row.delete(index_or_header).last } - end - end - + # Returns +nil+ if the argument is an empty \String: + # CSV.parse_line('') # => nil # - # Removes any column or row for which the block returns +true+. In the - # default mixed mode or row mode, iteration is the standard row major - # walking of rows. In column mode, iteration will +yield+ two element - # tuples containing the column name and an Array of values for that column. + # ====== With Option +headers+ # - # This method returns the table for chaining. + # With {option +headers+}[#class-CSV-label-Option+headers], + # returns the first row as a CSV::Row object. # - def delete_if(&block) - if @mode == :row or @mode == :col_or_row # by index - @table.delete_if(&block) - else # by header - to_delete = Array.new - headers.each_with_index do |header, i| - to_delete << header if block[[header, self[header]]] - end - to_delete.map { |header| delete(header) } - end - - self # for chaining - end - - include Enumerable - + # These examples assume prior execution of: + # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) # - # In the default mixed mode or row mode, iteration is the standard row major - # walking of rows. In column mode, iteration will +yield+ two element - # tuples containing the column name and an Array of values for that column. + # Parse the first line from a \String object: + # CSV.parse_line(string, headers: true) # => #<CSV::Row "Name":"foo" "Count":"0"> # - # This method returns the table for chaining. + # Parse the first line from a File object: + # File.open(path) do |file| + # CSV.parse_line(file, headers: true) + # end # => #<CSV::Row "Name":"foo" "Count":"0"> # - def each(&block) - if @mode == :col - headers.each { |header| block[[header, self[header]]] } - else - @table.each(&block) - end - - self # for chaining - end - - # Returns +true+ if all rows of this table ==() +other+'s rows. - def ==(other) - @table == other.table - end - + # --- # - # Returns the table as an Array of Arrays. Headers will be the first row, - # then all of the field rows will follow. + # Raises an exception if the argument is +nil+: + # # Raises ArgumentError (Cannot parse nil as CSV): + # CSV.parse_line(nil) # - def to_a - @table.inject([headers]) do |array, row| - if row.header_row? - array - else - array + [row.fields] - end - end + def parse_line(line, **options) + new(line, **options).each.first end # - # Returns the table as a complete CSV String. Headers will be listed first, - # then all of the field rows. + # :call-seq: + # read(source, **options) -> array_of_arrays + # read(source, headers: true, **options) -> csv_table # - # This method assumes you want the Table.headers(), unless you explicitly - # pass <tt>:write_headers => false</tt>. + # Opens the given +source+ with the given +options+ (see CSV.open), + # reads the source (see CSV#read), and returns the result, + # which will be either an \Array of Arrays or a CSV::Table. # - def to_csv(options = Hash.new) - wh = options.fetch(:write_headers, true) - @table.inject(wh ? [headers.to_csv(options)] : [ ]) do |rows, row| - if row.header_row? - rows - else - rows + [row.fields.to_csv(options)] - end - end.join('') + # Without headers: + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # With headers: + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # CSV.read(path, headers: true) # => #<CSV::Table mode:col_or_row row_count:4> + def read(path, **options) + open(path, **options) { |csv| csv.read } end - alias_method :to_s, :to_csv - # Shows the mode and size of this table in a US-ASCII String. - def inspect - "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII") + # :call-seq: + # CSV.readlines(source, **options) + # + # Alias for CSV.read. + def readlines(path, **options) + read(path, **options) end - end - - # The error thrown when the parser encounters illegal CSV formatting. - class MalformedCSVError < RuntimeError; end - - # - # A FieldInfo Struct contains details about a field's position in the data - # source it was read from. CSV will pass this Struct to some blocks that make - # decisions based on field structure. See CSV.convert_fields() for an - # example. - # - # <b><tt>index</tt></b>:: The zero-based index of the field in its row. - # <b><tt>line</tt></b>:: The line of the data source this row is from. - # <b><tt>header</tt></b>:: The header for the column, when available. - # - FieldInfo = Struct.new(:index, :line, :header) - - # A Regexp used to find and convert some common Date formats. - DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | - \d{4}-\d{2}-\d{2} )\z /x - # A Regexp used to find and convert some common DateTime formats. - DateTimeMatcher = - / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} | - \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x - # The encoding used by all converters. - ConverterEncoding = Encoding.find("UTF-8") - - # - # This Hash holds the built-in converters of CSV that can be accessed by name. - # You can select Converters with CSV.convert() or through the +options+ Hash - # passed to CSV::new(). - # - # <b><tt>:integer</tt></b>:: Converts any field Integer() accepts. - # <b><tt>:float</tt></b>:: Converts any field Float() accepts. - # <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt> - # and <tt>:float</tt>. - # <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts. - # <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts. - # <b><tt>:all</tt></b>:: All built-in converters. A combination of - # <tt>:date_time</tt> and <tt>:numeric</tt>. - # - # All built-in converters transcode field data to UTF-8 before attempting a - # conversion. If your data cannot be transcoded to UTF-8 the conversion will - # fail and the field will remain unchanged. - # - # This Hash is intentionally left unfrozen and users should feel free to add - # values to it that can be accessed by all CSV objects. - # - # To add a combo field, the value should be an Array of names. Combo fields - # can be nested with other combo fields. - # - Converters = { integer: lambda { |f| - Integer(f.encode(ConverterEncoding)) rescue f - }, - float: lambda { |f| - Float(f.encode(ConverterEncoding)) rescue f - }, - numeric: [:integer, :float], - date: lambda { |f| - begin - e = f.encode(ConverterEncoding) - e =~ DateMatcher ? Date.parse(e) : f - rescue # encoding conversion or date parse errors - f - end - }, - date_time: lambda { |f| - begin - e = f.encode(ConverterEncoding) - e =~ DateTimeMatcher ? DateTime.parse(e) : f - rescue # encoding conversion or date parse errors - f - end - }, - all: [:date_time, :numeric] } - - # - # This Hash holds the built-in header converters of CSV that can be accessed - # by name. You can select HeaderConverters with CSV.header_convert() or - # through the +options+ Hash passed to CSV::new(). - # - # <b><tt>:downcase</tt></b>:: Calls downcase() on the header String. - # <b><tt>:symbol</tt></b>:: The header String is downcased, spaces are - # replaced with underscores, non-word characters - # are dropped, and finally to_sym() is called. - # - # All built-in header converters transcode header data to UTF-8 before - # attempting a conversion. If your data cannot be transcoded to UTF-8 the - # conversion will fail and the header will remain unchanged. - # - # This Hash is intentionally left unfrozen and users should feel free to add - # values to it that can be accessed by all CSV objects. - # - # To add a combo field, the value should be an Array of names. Combo fields - # can be nested with other combo fields. - # - HeaderConverters = { - downcase: lambda { |h| h.encode(ConverterEncoding).downcase }, - symbol: lambda { |h| - h.encode(ConverterEncoding).downcase.strip.gsub(/\s+/, "_"). - gsub(/\W+/, "").to_sym - } - } - - # - # The options used when no overrides are given by calling code. They are: - # - # <b><tt>:col_sep</tt></b>:: <tt>","</tt> - # <b><tt>:row_sep</tt></b>:: <tt>:auto</tt> - # <b><tt>:quote_char</tt></b>:: <tt>'"'</tt> - # <b><tt>:field_size_limit</tt></b>:: +nil+ - # <b><tt>:converters</tt></b>:: +nil+ - # <b><tt>:unconverted_fields</tt></b>:: +nil+ - # <b><tt>:headers</tt></b>:: +false+ - # <b><tt>:return_headers</tt></b>:: +false+ - # <b><tt>:header_converters</tt></b>:: +nil+ - # <b><tt>:skip_blanks</tt></b>:: +false+ - # <b><tt>:force_quotes</tt></b>:: +false+ - # <b><tt>:skip_lines</tt></b>:: +nil+ - # - DEFAULT_OPTIONS = { col_sep: ",", - row_sep: :auto, - quote_char: '"', - field_size_limit: nil, - converters: nil, - unconverted_fields: nil, - headers: false, - return_headers: false, - header_converters: nil, - skip_blanks: false, - force_quotes: false, - skip_lines: nil }.freeze - - # - # This method will return a CSV instance, just like CSV::new(), but the - # instance will be cached and returned for all future calls to this method for - # the same +data+ object (tested by Object#object_id()) with the same - # +options+. - # - # If a block is given, the instance is passed to the block and the return - # value becomes the return value of the block. - # - def self.instance(data = $stdout, options = Hash.new) - # create a _signature_ for this method call, data object and options - sig = [data.object_id] + - options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s }) - - # fetch or create the instance for this signature - @@instances ||= Hash.new - instance = (@@instances[sig] ||= new(data, options)) - - if block_given? - yield instance # run block, if given, returning result - else - instance # or return the instance + # :call-seq: + # CSV.table(source, **options) + # + # Calls CSV.read with +source+, +options+, and certain default options: + # - +headers+: +true+ + # - +converters+: +:numeric+ + # - +header_converters+: +:symbol+ + # + # Returns a CSV::Table object. + # + # Example: + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # CSV.table(path) # => #<CSV::Table mode:col_or_row row_count:4> + def table(path, **options) + default_options = { + headers: true, + converters: :numeric, + header_converters: :symbol, + } + options = default_options.merge(options) + read(path, **options) end end - # # :call-seq: - # filter( options = Hash.new ) { |row| ... } - # filter( input, options = Hash.new ) { |row| ... } - # filter( input, output, options = Hash.new ) { |row| ... } - # - # This method is a convenience for building Unix-like filters for CSV data. - # Each row is yielded to the provided block which can alter it as needed. - # After the block returns, the row is appended to +output+ altered or not. - # - # The +input+ and +output+ arguments can be anything CSV::new() accepts - # (generally String or IO objects). If not given, they default to - # <tt>ARGF</tt> and <tt>$stdout</tt>. - # - # The +options+ parameter is also filtered down to CSV::new() after some - # clever key parsing. Any key beginning with <tt>:in_</tt> or - # <tt>:input_</tt> will have that leading identifier stripped and will only - # be used in the +options+ Hash for the +input+ object. Keys starting with - # <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys - # are assigned to both objects. - # - # The <tt>:output_row_sep</tt> +option+ defaults to - # <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>). - # - def self.filter(*args) - # parse options for input, output, or both - in_options, out_options = Hash.new, {row_sep: $INPUT_RECORD_SEPARATOR} - if args.last.is_a? Hash - args.pop.each do |key, value| - case key.to_s - when /\Ain(?:put)?_(.+)\Z/ - in_options[$1.to_sym] = value - when /\Aout(?:put)?_(.+)\Z/ - out_options[$1.to_sym] = value + # CSV.new(string) + # CSV.new(io) + # CSV.new(string, **options) + # CSV.new(io, **options) + # + # Returns the new \CSV object created using +string+ or +io+ + # and the specified +options+. + # + # - Argument +string+ should be a \String object; + # it will be put into a new StringIO object positioned at the beginning. + # :include: ../doc/csv/arguments/io.rdoc + # - Argument +options+: See: + # * {Options for Parsing}[#class-CSV-label-Options+for+Parsing] + # * {Options for Generating}[#class-CSV-label-Options+for+Generating] + # For performance reasons, the options cannot be overridden + # in a \CSV object, so those specified here will endure. + # + # In addition to the \CSV instance methods, several \IO methods are delegated. + # See {Delegated Methods}[#class-CSV-label-Delegated+Methods]. + # + # --- + # + # Create a \CSV object from a \String object: + # csv = CSV.new('foo,0') + # csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\""> + # + # Create a \CSV object from a \File object: + # File.write('t.csv', 'foo,0') + # csv = CSV.new(File.open('t.csv')) + # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\""> + # + # --- + # + # Raises an exception if the argument is +nil+: + # # Raises ArgumentError (Cannot parse nil as CSV): + # CSV.new(nil) + # + def initialize(data, + col_sep: ",", + row_sep: :auto, + quote_char: '"', + field_size_limit: nil, + max_field_size: nil, + converters: nil, + unconverted_fields: nil, + headers: false, + return_headers: false, + write_headers: nil, + header_converters: nil, + skip_blanks: false, + force_quotes: false, + skip_lines: nil, + liberal_parsing: false, + internal_encoding: nil, + external_encoding: nil, + encoding: nil, + nil_value: nil, + empty_value: "", + strip: false, + quote_empty: true, + write_converters: nil, + write_nil_value: nil, + write_empty_value: "") + raise ArgumentError.new("Cannot parse nil as CSV") if data.nil? + + if data.is_a?(String) + if encoding + if encoding.is_a?(String) + data_external_encoding, data_internal_encoding = encoding.split(":", 2) + if data_internal_encoding + data = data.encode(data_internal_encoding, data_external_encoding) + else + data = data.dup.force_encoding(data_external_encoding) + end else - in_options[key] = value - out_options[key] = value + data = data.dup.force_encoding(encoding) end end + @io = StringIO.new(data) + else + @io = data end - # build input and output wrappers - input = new(args.shift || ARGF, in_options) - output = new(args.shift || $stdout, out_options) - - # read, yield, write - input.each do |row| - yield row - output << row - end + @encoding = determine_encoding(encoding, internal_encoding) + + @base_fields_converter_options = { + nil_value: nil_value, + empty_value: empty_value, + } + @write_fields_converter_options = { + nil_value: write_nil_value, + empty_value: write_empty_value, + } + @initial_converters = converters + @initial_header_converters = header_converters + @initial_write_converters = write_converters + + if max_field_size.nil? and field_size_limit + max_field_size = field_size_limit - 1 + end + @parser_options = { + column_separator: col_sep, + row_separator: row_sep, + quote_character: quote_char, + max_field_size: max_field_size, + unconverted_fields: unconverted_fields, + headers: headers, + return_headers: return_headers, + skip_blanks: skip_blanks, + skip_lines: skip_lines, + liberal_parsing: liberal_parsing, + encoding: @encoding, + nil_value: nil_value, + empty_value: empty_value, + strip: strip, + } + @parser = nil + @parser_enumerator = nil + @eof_error = nil + + @writer_options = { + encoding: @encoding, + force_encoding: (not encoding.nil?), + force_quotes: force_quotes, + headers: headers, + write_headers: write_headers, + column_separator: col_sep, + row_separator: row_sep, + quote_character: quote_char, + quote_empty: quote_empty, + } + + @writer = nil + writer if @writer_options[:write_headers] end + # :call-seq: + # csv.col_sep -> string # - # This method is intended as the primary interface for reading CSV files. You - # pass a +path+ and any +options+ you wish to set for the read. Each row of - # file will be passed to the provided +block+ in turn. - # - # The +options+ parameter can be anything CSV::new() understands. This method - # also understands an additional <tt>:encoding</tt> parameter that you can use - # to specify the Encoding of the data in the file to be read. You must provide - # this unless your data is in Encoding::default_external(). CSV will use this - # to determine how to parse the data. You may provide a second Encoding to - # have the data transcoded as it is read. For example, - # <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file - # but transcode it to UTF-8 before CSV parses it. - # - def self.foreach(path, options = Hash.new, &block) - return to_enum(__method__, path, options) unless block - open(path, options) do |csv| - csv.each(&block) - end + # Returns the encoded column separator; used for parsing and writing; + # see {Option +col_sep+}[#class-CSV-label-Option+col_sep]: + # CSV.new('').col_sep # => "," + def col_sep + parser.column_separator end + # :call-seq: + # csv.row_sep -> string # + # Returns the encoded row separator; used for parsing and writing; + # see {Option +row_sep+}[#class-CSV-label-Option+row_sep]: + # CSV.new('').row_sep # => "\n" + def row_sep + parser.row_separator + end + # :call-seq: - # generate( str, options = Hash.new ) { |csv| ... } - # generate( options = Hash.new ) { |csv| ... } - # - # This method wraps a String you provide, or an empty default String, in a - # CSV object which is passed to the provided block. You can use the block to - # append CSV rows to the String and when the block exits, the final String - # will be returned. - # - # Note that a passed String *is* modified by this method. Call dup() before - # passing if you need a new String. - # - # The +options+ parameter can be anything CSV::new() understands. This method - # understands an additional <tt>:encoding</tt> parameter when not passed a - # String to set the base Encoding for the output. CSV needs this hint if you - # plan to output non-ASCII compatible data. - # - def self.generate(*args) - # add a default empty String, if none was given - if args.first.is_a? String - io = StringIO.new(args.shift) - io.seek(0, IO::SEEK_END) - args.unshift(io) - else - encoding = args[-1][:encoding] if args.last.is_a?(Hash) - str = "" - str.force_encoding(encoding) if encoding - args.unshift(str) - end - csv = new(*args) # wrap - yield csv # yield for appending - csv.string # return final String + # csv.quote_char -> character + # + # Returns the encoded quote character; used for parsing and writing; + # see {Option +quote_char+}[#class-CSV-label-Option+quote_char]: + # CSV.new('').quote_char # => "\"" + def quote_char + parser.quote_character end + # :call-seq: + # csv.field_size_limit -> integer or nil # - # This method is a shortcut for converting a single row (Array) into a CSV - # String. + # Returns the limit for field size; used for parsing; + # see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]: + # CSV.new('').field_size_limit # => nil # - # The +options+ parameter can be anything CSV::new() understands. This method - # understands an additional <tt>:encoding</tt> parameter to set the base - # Encoding for the output. This method will try to guess your Encoding from - # the first non-+nil+ field in +row+, if possible, but you may need to use - # this parameter as a backup plan. + # Deprecated since 3.2.3. Use +max_field_size+ instead. + def field_size_limit + parser.field_size_limit + end + + # :call-seq: + # csv.max_field_size -> integer or nil # - # The <tt>:row_sep</tt> +option+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt> - # (<tt>$/</tt>) when calling this method. + # Returns the limit for field size; used for parsing; + # see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]: + # CSV.new('').max_field_size # => nil # - def self.generate_line(row, options = Hash.new) - options = {row_sep: $INPUT_RECORD_SEPARATOR}.merge(options) - encoding = options.delete(:encoding) - str = "" - if encoding - str.force_encoding(encoding) - elsif field = row.find { |f| not f.nil? } - str.force_encoding(String(field).encoding) - end - (new(str, options) << row).string + # Since 3.2.3. + def max_field_size + parser.max_field_size end + # :call-seq: + # csv.skip_lines -> regexp or nil # + # Returns the \Regexp used to identify comment lines; used for parsing; + # see {Option +skip_lines+}[#class-CSV-label-Option+skip_lines]: + # CSV.new('').skip_lines # => nil + def skip_lines + parser.skip_lines + end + # :call-seq: - # open( filename, mode = "rb", options = Hash.new ) { |faster_csv| ... } - # open( filename, options = Hash.new ) { |faster_csv| ... } - # open( filename, mode = "rb", options = Hash.new ) - # open( filename, options = Hash.new ) - # - # This method opens an IO object, and wraps that with CSV. This is intended - # as the primary interface for writing a CSV file. - # - # You must pass a +filename+ and may optionally add a +mode+ for Ruby's - # open(). You may also pass an optional Hash containing any +options+ - # CSV::new() understands as the final argument. - # - # This method works like Ruby's open() call, in that it will pass a CSV object - # to a provided block and close it when the block terminates, or it will - # return the CSV object when no block is provided. (*Note*: This is different - # from the Ruby 1.8 CSV library which passed rows to the block. Use - # CSV::foreach() for that behavior.) - # - # You must provide a +mode+ with an embedded Encoding designator unless your - # data is in Encoding::default_external(). CSV will check the Encoding of the - # underlying IO object (set by the +mode+ you pass) to determine how to parse - # the data. You may provide a second Encoding to have the data transcoded as - # it is read just as you can with a normal call to IO::open(). For example, - # <tt>"rb:UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file but - # transcode it to UTF-8 before CSV parses it. - # - # An opened CSV object will delegate to many IO methods for convenience. You - # may call: - # - # * binmode() - # * binmode?() - # * close() - # * close_read() - # * close_write() - # * closed?() - # * eof() - # * eof?() - # * external_encoding() - # * fcntl() - # * fileno() - # * flock() - # * flush() - # * fsync() - # * internal_encoding() - # * ioctl() - # * isatty() - # * path() - # * pid() - # * pos() - # * pos=() - # * reopen() - # * seek() - # * stat() - # * sync() - # * sync=() - # * tell() - # * to_i() - # * to_io() - # * truncate() - # * tty?() - # - def self.open(*args) - # find the +options+ Hash - options = if args.last.is_a? Hash then args.pop else Hash.new end - # wrap a File opened with the remaining +args+ with no newline - # decorator - file_opts = {universal_newline: false}.merge(options) - begin - f = File.open(*args, file_opts) - rescue ArgumentError => e - raise unless /needs binmode/ =~ e.message and args.size == 1 - args << "rb" - file_opts = {encoding: Encoding.default_external}.merge(file_opts) - retry - end - begin - csv = new(f, options) - rescue Exception - f.close - raise + # csv.converters -> array + # + # Returns an \Array containing field converters; + # see {Field Converters}[#class-CSV-label-Field+Converters]: + # csv = CSV.new('') + # csv.converters # => [] + # csv.convert(:integer) + # csv.converters # => [:integer] + # csv.convert(proc {|x| x.to_s }) + # csv.converters + # + # Notes that you need to call + # +Ractor.make_shareable(CSV::Converters)+ on the main Ractor to use + # this method. + def converters + parser_fields_converter.map do |converter| + name = Converters.rassoc(converter) + name ? name.first : converter end + end - # handle blocks like Ruby's open(), not like the CSV library - if block_given? - begin - yield csv - ensure - csv.close - end + # :call-seq: + # csv.unconverted_fields? -> object + # + # Returns the value that determines whether unconverted fields are to be + # available; used for parsing; + # see {Option +unconverted_fields+}[#class-CSV-label-Option+unconverted_fields]: + # CSV.new('').unconverted_fields? # => nil + def unconverted_fields? + parser.unconverted_fields? + end + + # :call-seq: + # csv.headers -> object + # + # Returns the value that determines whether headers are used; used for parsing; + # see {Option +headers+}[#class-CSV-label-Option+headers]: + # CSV.new('').headers # => nil + def headers + if @writer + @writer.headers else - csv + parsed_headers = parser.headers + return parsed_headers if parsed_headers + raw_headers = @parser_options[:headers] + raw_headers = nil if raw_headers == false + raw_headers end end - # # :call-seq: - # parse( str, options = Hash.new ) { |row| ... } - # parse( str, options = Hash.new ) + # csv.return_headers? -> true or false # - # This method can be used to easily parse CSV out of a String. You may either - # provide a +block+ which will be called with each row of the String in turn, - # or just use the returned Array of Arrays (when no +block+ is given). - # - # You pass your +str+ to read from, and an optional +options+ Hash containing - # anything CSV::new() understands. - # - def self.parse(*args, &block) - csv = new(*args) - if block.nil? # slurp contents, if no block is given - begin - csv.read - ensure - csv.close - end - else # or pass each row to a provided block - csv.each(&block) - end + # Returns the value that determines whether headers are to be returned; used for parsing; + # see {Option +return_headers+}[#class-CSV-label-Option+return_headers]: + # CSV.new('').return_headers? # => false + def return_headers? + parser.return_headers? end + # :call-seq: + # csv.write_headers? -> true or false # - # This method is a shortcut for converting a single line of a CSV String into - # an Array. Note that if +line+ contains multiple rows, anything beyond the - # first row is ignored. + # Returns the value that determines whether headers are to be written; used for generating; + # see {Option +write_headers+}[#class-CSV-label-Option+write_headers]: + # CSV.new('').write_headers? # => nil + def write_headers? + @writer_options[:write_headers] + end + + # :call-seq: + # csv.header_converters -> array # - # The +options+ parameter can be anything CSV::new() understands. + # Returns an \Array containing header converters; used for parsing; + # see {Header Converters}[#class-CSV-label-Header+Converters]: + # CSV.new('').header_converters # => [] # - def self.parse_line(line, options = Hash.new) - new(line, options).shift + # Notes that you need to call + # +Ractor.make_shareable(CSV::HeaderConverters)+ on the main Ractor + # to use this method. + def header_converters + header_fields_converter.map do |converter| + name = HeaderConverters.rassoc(converter) + name ? name.first : converter + end end + # :call-seq: + # csv.skip_blanks? -> true or false # - # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the - # file and any +options+ CSV::new() understands. This method also understands - # an additional <tt>:encoding</tt> parameter that you can use to specify the - # Encoding of the data in the file to be read. You must provide this unless - # your data is in Encoding::default_external(). CSV will use this to determine - # how to parse the data. You may provide a second Encoding to have the data - # transcoded as it is read. For example, - # <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file - # but transcode it to UTF-8 before CSV parses it. - # - def self.read(path, *options) - open(path, *options) { |csv| csv.read } + # Returns the value that determines whether blank lines are to be ignored; used for parsing; + # see {Option +skip_blanks+}[#class-CSV-label-Option+skip_blanks]: + # CSV.new('').skip_blanks? # => false + def skip_blanks? + parser.skip_blanks? end - # Alias for CSV::read(). - def self.readlines(*args) - read(*args) + # :call-seq: + # csv.force_quotes? -> true or false + # + # Returns the value that determines whether all output fields are to be quoted; + # used for generating; + # see {Option +force_quotes+}[#class-CSV-label-Option+force_quotes]: + # CSV.new('').force_quotes? # => false + def force_quotes? + @writer_options[:force_quotes] end + # :call-seq: + # csv.liberal_parsing? -> true or false # - # A shortcut for: - # - # CSV.read( path, { headers: true, - # converters: :numeric, - # header_converters: :symbol }.merge(options) ) - # - def self.table(path, options = Hash.new) - read( path, { headers: true, - converters: :numeric, - header_converters: :symbol }.merge(options) ) + # Returns the value that determines whether illegal input is to be handled; used for parsing; + # see {Option +liberal_parsing+}[#class-CSV-label-Option+liberal_parsing]: + # CSV.new('').liberal_parsing? # => false + def liberal_parsing? + parser.liberal_parsing? end + # :call-seq: + # csv.encoding -> encoding # - # This constructor will wrap either a String or IO object passed in +data+ for - # reading and/or writing. In addition to the CSV instance methods, several IO - # methods are delegated. (See CSV::open() for a complete list.) If you pass - # a String for +data+, you can later retrieve it (after writing to it, for - # example) with CSV.string(). - # - # Note that a wrapped String will be positioned at at the beginning (for - # reading). If you want it at the end (for writing), use CSV::generate(). - # If you want any other positioning, pass a preset StringIO object instead. - # - # You may set any reading and/or writing preferences in the +options+ Hash. - # Available options are: - # - # <b><tt>:col_sep</tt></b>:: The String placed between each field. - # This String will be transcoded into - # the data's Encoding before parsing. - # <b><tt>:row_sep</tt></b>:: The String appended to the end of each - # row. This can be set to the special - # <tt>:auto</tt> setting, which requests - # that CSV automatically discover this - # from the data. Auto-discovery reads - # ahead in the data looking for the next - # <tt>"\r\n"</tt>, <tt>"\n"</tt>, or - # <tt>"\r"</tt> sequence. A sequence - # will be selected even if it occurs in - # a quoted field, assuming that you - # would have the same line endings - # there. If none of those sequences is - # found, +data+ is <tt>ARGF</tt>, - # <tt>STDIN</tt>, <tt>STDOUT</tt>, or - # <tt>STDERR</tt>, or the stream is only - # available for output, the default - # <tt>$INPUT_RECORD_SEPARATOR</tt> - # (<tt>$/</tt>) is used. Obviously, - # discovery takes a little time. Set - # manually if speed is important. Also - # note that IO objects should be opened - # in binary mode on Windows if this - # feature will be used as the - # line-ending translation can cause - # problems with resetting the document - # position to where it was before the - # read ahead. This String will be - # transcoded into the data's Encoding - # before parsing. - # <b><tt>:quote_char</tt></b>:: The character used to quote fields. - # This has to be a single character - # String. This is useful for - # application that incorrectly use - # <tt>'</tt> as the quote character - # instead of the correct <tt>"</tt>. - # CSV will always consider a double - # sequence of this character to be an - # escaped quote. This String will be - # transcoded into the data's Encoding - # before parsing. - # <b><tt>:field_size_limit</tt></b>:: This is a maximum size CSV will read - # ahead looking for the closing quote - # for a field. (In truth, it reads to - # the first line ending beyond this - # size.) If a quote cannot be found - # within the limit CSV will raise a - # MalformedCSVError, assuming the data - # is faulty. You can use this limit to - # prevent what are effectively DoS - # attacks on the parser. However, this - # limit can cause a legitimate parse to - # fail and thus is set to +nil+, or off, - # by default. - # <b><tt>:converters</tt></b>:: An Array of names from the Converters - # Hash and/or lambdas that handle custom - # conversion. A single converter - # doesn't have to be in an Array. All - # built-in converters try to transcode - # fields to UTF-8 before converting. - # The conversion will fail if the data - # cannot be transcoded, leaving the - # field unchanged. - # <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an - # unconverted_fields() method will be - # added to all returned rows (Array or - # CSV::Row) that will return the fields - # as they were before conversion. Note - # that <tt>:headers</tt> supplied by - # Array or String were not fields of the - # document and thus will have an empty - # Array attached. - # <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or - # +true+, the initial row of the CSV - # file will be treated as a row of - # headers. If set to an Array, the - # contents will be used as the headers. - # If set to a String, the String is run - # through a call of CSV::parse_line() - # with the same <tt>:col_sep</tt>, - # <tt>:row_sep</tt>, and - # <tt>:quote_char</tt> as this instance - # to produce an Array of headers. This - # setting causes CSV#shift() to return - # rows as CSV::Row objects instead of - # Arrays and CSV#read() to return - # CSV::Table objects instead of an Array - # of Arrays. - # <b><tt>:return_headers</tt></b>:: When +false+, header rows are silently - # swallowed. If set to +true+, header - # rows are returned in a CSV::Row object - # with identical headers and - # fields (save that the fields do not go - # through the converters). - # <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is - # set, a header row will be added to the - # output. - # <b><tt>:header_converters</tt></b>:: Identical in functionality to - # <tt>:converters</tt> save that the - # conversions are only made to header - # rows. All built-in converters try to - # transcode headers to UTF-8 before - # converting. The conversion will fail - # if the data cannot be transcoded, - # leaving the header unchanged. - # <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, CSV will - # skip over any empty rows. Note that - # this setting will not skip rows that - # contain column separators, even if - # the rows contain no actual data. If - # you want to skip rows that contain - # separators but no content, consider - # using <tt>:skip_lines</tt>, or - # inspecting fields.compact.empty? on - # each row. - # <b><tt>:force_quotes</tt></b>:: When set to a +true+ value, CSV will - # quote all CSV fields it creates. - # <b><tt>:skip_lines</tt></b>:: When set to an object responding to - # <tt>match</tt>, every line matching - # it is considered a comment and ignored - # during parsing. When set to a String, - # it is first converted to a Regexp. - # When set to +nil+ no line is considered - # a comment. If the passed object does - # not respond to <tt>match</tt>, - # <tt>ArgumentError</tt> is thrown. - # - # See CSV::DEFAULT_OPTIONS for the default settings. - # - # Options cannot be overridden in the instance methods for performance reasons, - # so be sure to set what you want here. - # - def initialize(data, options = Hash.new) - if data.nil? - raise ArgumentError.new("Cannot parse nil as CSV") - end - - # build the options for this read/write - options = DEFAULT_OPTIONS.merge(options) + # Returns the encoding used for parsing and generating; + # see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]: + # CSV.new('').encoding # => #<Encoding:UTF-8> + attr_reader :encoding - # create the IO object we will read from - @io = data.is_a?(String) ? StringIO.new(data) : data - # honor the IO encoding if we can, otherwise default to ASCII-8BIT - @encoding = raw_encoding(nil) || - ( if encoding = options.delete(:internal_encoding) - case encoding - when Encoding; encoding - else Encoding.find(encoding) - end - end ) || - ( case encoding = options.delete(:encoding) - when Encoding; encoding - when /\A[^:]+/; Encoding.find($&) - end ) || - Encoding.default_internal || Encoding.default_external - # - # prepare for building safe regular expressions in the target encoding, - # if we can transcode the needed characters - # - @re_esc = "\\".encode(@encoding) rescue "" - @re_chars = /#{%"[-\\]\\[\\.^$?*+{}()|# \r\n\t\f\v]".encode(@encoding)}/ - - init_separators(options) - init_parsers(options) - init_converters(options) - init_headers(options) - init_comments(options) - - @force_encoding = !!(encoding || options.delete(:encoding)) - options.delete(:internal_encoding) - options.delete(:external_encoding) - unless options.empty? - raise ArgumentError, "Unknown options: #{options.keys.join(', ')}." + # :call-seq: + # csv.line_no -> integer + # + # Returns the count of the rows parsed or generated. + # + # Parsing: + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # CSV.open(path) do |csv| + # csv.each do |row| + # p [csv.lineno, row] + # end + # end + # Output: + # [1, ["foo", "0"]] + # [2, ["bar", "1"]] + # [3, ["baz", "2"]] + # + # Generating: + # CSV.generate do |csv| + # p csv.lineno; csv << ['foo', 0] + # p csv.lineno; csv << ['bar', 1] + # p csv.lineno; csv << ['baz', 2] + # end + # Output: + # 0 + # 1 + # 2 + def lineno + if @writer + @writer.lineno + else + parser.lineno end - - # track our own lineno since IO gets confused about line-ends is CSV fields - @lineno = 0 end - # - # The encoded <tt>:col_sep</tt> used in parsing and writing. See CSV::new - # for details. - # - attr_reader :col_sep - # - # The encoded <tt>:row_sep</tt> used in parsing and writing. See CSV::new - # for details. - # - attr_reader :row_sep - # - # The encoded <tt>:quote_char</tt> used in parsing and writing. See CSV::new - # for details. - # - attr_reader :quote_char - # The limit for field size, if any. See CSV::new for details. - attr_reader :field_size_limit + # :call-seq: + # csv.line -> array + # + # Returns the line most recently read: + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # CSV.open(path) do |csv| + # csv.each do |row| + # p [csv.lineno, csv.line] + # end + # end + # Output: + # [1, "foo,0\n"] + # [2, "bar,1\n"] + # [3, "baz,2\n"] + def line + parser.line + end - # The regex marking a line as a comment. See CSV::new for details - attr_reader :skip_lines + ### IO and StringIO Delegation ### - # - # Returns the current list of converters in effect. See CSV::new for details. - # Built-in converters will be returned by name, while others will be returned - # as is. - # - def converters - @converters.map do |converter| - name = Converters.rassoc(converter) - name ? name.first : converter + extend Forwardable + def_delegators :@io, :binmode, :close, :close_read, :close_write, + :closed?, :external_encoding, :fcntl, + :fileno, :flush, :fsync, :internal_encoding, + :isatty, :pid, :pos, :pos=, :reopen, + :seek, :string, :sync, :sync=, :tell, + :truncate, :tty? + + def binmode? + if @io.respond_to?(:binmode?) + @io.binmode? + else + false end end - # - # Returns +true+ if unconverted_fields() to parsed results. See CSV::new - # for details. - # - def unconverted_fields?() @unconverted_fields end - # - # Returns +nil+ if headers will not be used, +true+ if they will but have not - # yet been read, or the actual headers after they have been read. See - # CSV::new for details. - # - def headers - @headers || true if @use_headers + + def flock(*args) + raise NotImplementedError unless @io.respond_to?(:flock) + @io.flock(*args) end - # - # Returns +true+ if headers will be returned as a row of results. - # See CSV::new for details. - # - def return_headers?() @return_headers end - # Returns +true+ if headers are written in output. See CSV::new for details. - def write_headers?() @write_headers end - # - # Returns the current list of converters in effect for headers. See CSV::new - # for details. Built-in converters will be returned by name, while others - # will be returned as is. - # - def header_converters - @header_converters.map do |converter| - name = HeaderConverters.rassoc(converter) - name ? name.first : converter - end + + def ioctl(*args) + raise NotImplementedError unless @io.respond_to?(:ioctl) + @io.ioctl(*args) end - # - # Returns +true+ blank lines are skipped by the parser. See CSV::new - # for details. - # - def skip_blanks?() @skip_blanks end - # Returns +true+ if all output fields are quoted. See CSV::new for details. - def force_quotes?() @force_quotes end - # - # The Encoding CSV is parsing or writing in. This will be the Encoding you - # receive parsed data in and/or the Encoding data will be written in. - # - attr_reader :encoding + def path + @io.path if @io.respond_to?(:path) + end - # - # The line number of the last row read from this file. Fields with nested - # line-end characters will not affect this count. - # - attr_reader :lineno + def stat(*args) + raise NotImplementedError unless @io.respond_to?(:stat) + @io.stat(*args) + end - ### IO and StringIO Delegation ### + def to_i + raise NotImplementedError unless @io.respond_to?(:to_i) + @io.to_i + end - extend Forwardable - def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write, - :closed?, :eof, :eof?, :external_encoding, :fcntl, - :fileno, :flock, :flush, :fsync, :internal_encoding, - :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen, - :seek, :stat, :string, :sync, :sync=, :tell, :to_i, - :to_io, :truncate, :tty? + def to_io + @io.respond_to?(:to_io) ? @io.to_io : @io + end + + def eof? + return false if @eof_error + begin + parser_enumerator.peek + false + rescue MalformedCSVError => error + @eof_error = error + false + rescue StopIteration + true + end + end + alias_method :eof, :eof? # Rewinds the underlying IO object and resets CSV's lineno() counter. def rewind - @headers = nil - @lineno = 0 - + @parser = nil + @parser_enumerator = nil + @eof_error = nil + @writer.rewind if @writer @io.rewind end ### End Delegation ### - # - # The primary write method for wrapped Strings and IOs, +row+ (an Array or - # CSV::Row) is converted to CSV and appended to the data source. When a - # CSV::Row is passed, only the row's fields() are appended to the output. - # - # The data source must be open for writing. - # + # :call-seq: + # csv << row -> self + # + # Appends a row to +self+. + # + # - Argument +row+ must be an \Array object or a CSV::Row object. + # - The output stream must be open for writing. + # + # --- + # + # Append Arrays: + # CSV.generate do |csv| + # csv << ['foo', 0] + # csv << ['bar', 1] + # csv << ['baz', 2] + # end # => "foo,0\nbar,1\nbaz,2\n" + # + # Append CSV::Rows: + # headers = [] + # CSV.generate do |csv| + # csv << CSV::Row.new(headers, ['foo', 0]) + # csv << CSV::Row.new(headers, ['bar', 1]) + # csv << CSV::Row.new(headers, ['baz', 2]) + # end # => "foo,0\nbar,1\nbaz,2\n" + # + # Headers in CSV::Row objects are not appended: + # headers = ['Name', 'Count'] + # CSV.generate do |csv| + # csv << CSV::Row.new(headers, ['foo', 0]) + # csv << CSV::Row.new(headers, ['bar', 1]) + # csv << CSV::Row.new(headers, ['baz', 2]) + # end # => "foo,0\nbar,1\nbaz,2\n" + # + # --- + # + # Raises an exception if +row+ is not an \Array or \CSV::Row: + # CSV.generate do |csv| + # # Raises NoMethodError (undefined method `collect' for :foo:Symbol) + # csv << :foo + # end + # + # Raises an exception if the output stream is not opened for writing: + # path = 't.csv' + # File.write(path, '') + # File.open(path) do |file| + # CSV.open(file) do |csv| + # # Raises IOError (not opened for writing) + # csv << ['foo', 0] + # end + # end def <<(row) - # make sure headers have been assigned - if header_row? and [Array, String].include? @use_headers.class - parse_headers # won't read data for Array or String - self << @headers if @write_headers - end - - # handle CSV::Row objects and Hashes - row = case row - when self.class::Row then row.fields - when Hash then @headers.map { |header| row[header] } - else row - end - - @headers = row if header_row? - @lineno += 1 - - output = row.map(&@quote).join(@col_sep) + @row_sep # quote and separate - if @io.is_a?(StringIO) and - output.encoding != (encoding = raw_encoding) - if @force_encoding - output = output.encode(encoding) - elsif (compatible_encoding = Encoding.compatible?(@io.string, output)) - @io.set_encoding(compatible_encoding) - @io.seek(0, IO::SEEK_END) - end - end - @io << output - - self # for chaining + writer << row + self end alias_method :add_row, :<< alias_method :puts, :<< - # # :call-seq: - # convert( name ) - # convert { |field| ... } - # convert { |field, field_info| ... } - # - # You can use this method to install a CSV::Converters built-in, or provide a - # block that handles a custom conversion. - # - # If you provide a block that takes one argument, it will be passed the field - # and is expected to return the converted value or the field itself. If your - # block takes two arguments, it will also be passed a CSV::FieldInfo Struct, - # containing details about the field. Again, the block should return a - # converted field or the field itself. - # + # convert(converter_name) -> array_of_procs + # convert {|field, field_info| ... } -> array_of_procs + # + # - With no block, installs a field converter (a \Proc). + # - With a block, defines and installs a custom field converter. + # - Returns the \Array of installed field converters. + # + # - Argument +converter_name+, if given, should be the name + # of an existing field converter. + # + # See {Field Converters}[#class-CSV-label-Field+Converters]. + # --- + # + # With no block, installs a field converter: + # csv = CSV.new('') + # csv.convert(:integer) + # csv.convert(:float) + # csv.convert(:date) + # csv.converters # => [:integer, :float, :date] + # + # --- + # + # The block, if given, is called for each field: + # - Argument +field+ is the field value. + # - Argument +field_info+ is a CSV::FieldInfo object + # containing details about the field. + # + # The examples here assume the prior execution of: + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # + # Example giving a block: + # csv = CSV.open(path) + # csv.convert {|field, field_info| p [field, field_info]; field.upcase } + # csv.read # => [["FOO", "0"], ["BAR", "1"], ["BAZ", "2"]] + # + # Output: + # ["foo", #<struct CSV::FieldInfo index=0, line=1, header=nil>] + # ["0", #<struct CSV::FieldInfo index=1, line=1, header=nil>] + # ["bar", #<struct CSV::FieldInfo index=0, line=2, header=nil>] + # ["1", #<struct CSV::FieldInfo index=1, line=2, header=nil>] + # ["baz", #<struct CSV::FieldInfo index=0, line=3, header=nil>] + # ["2", #<struct CSV::FieldInfo index=1, line=3, header=nil>] + # + # The block need not return a \String object: + # csv = CSV.open(path) + # csv.convert {|field, field_info| field.to_sym } + # csv.read # => [[:foo, :"0"], [:bar, :"1"], [:baz, :"2"]] + # + # If +converter_name+ is given, the block is not called: + # csv = CSV.open(path) + # csv.convert(:integer) {|field, field_info| fail 'Cannot happen' } + # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]] + # + # --- + # + # Raises a parse-time exception if +converter_name+ is not the name of a built-in + # field converter: + # csv = CSV.open(path) + # csv.convert(:nosuch) => [nil] + # # Raises NoMethodError (undefined method `arity' for nil:NilClass) + # csv.read def convert(name = nil, &converter) - add_converter(:converters, self.class::Converters, name, &converter) + parser_fields_converter.add_converter(name, &converter) end - # # :call-seq: - # header_convert( name ) - # header_convert { |field| ... } - # header_convert { |field, field_info| ... } - # - # Identical to CSV#convert(), but for header rows. - # - # Note that this method must be called before header rows are read to have any - # effect. - # + # header_convert(converter_name) -> array_of_procs + # header_convert {|header, field_info| ... } -> array_of_procs + # + # - With no block, installs a header converter (a \Proc). + # - With a block, defines and installs a custom header converter. + # - Returns the \Array of installed header converters. + # + # - Argument +converter_name+, if given, should be the name + # of an existing header converter. + # + # See {Header Converters}[#class-CSV-label-Header+Converters]. + # --- + # + # With no block, installs a header converter: + # csv = CSV.new('') + # csv.header_convert(:symbol) + # csv.header_convert(:downcase) + # csv.header_converters # => [:symbol, :downcase] + # + # --- + # + # The block, if given, is called for each header: + # - Argument +header+ is the header value. + # - Argument +field_info+ is a CSV::FieldInfo object + # containing details about the header. + # + # The examples here assume the prior execution of: + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # + # Example giving a block: + # csv = CSV.open(path, headers: true) + # csv.header_convert {|header, field_info| p [header, field_info]; header.upcase } + # table = csv.read + # table # => #<CSV::Table mode:col_or_row row_count:4> + # table.headers # => ["NAME", "VALUE"] + # + # Output: + # ["Name", #<struct CSV::FieldInfo index=0, line=1, header=nil>] + # ["Value", #<struct CSV::FieldInfo index=1, line=1, header=nil>] + + # The block need not return a \String object: + # csv = CSV.open(path, headers: true) + # csv.header_convert {|header, field_info| header.to_sym } + # table = csv.read + # table.headers # => [:Name, :Value] + # + # If +converter_name+ is given, the block is not called: + # csv = CSV.open(path, headers: true) + # csv.header_convert(:downcase) {|header, field_info| fail 'Cannot happen' } + # table = csv.read + # table.headers # => ["name", "value"] + # --- + # + # Raises a parse-time exception if +converter_name+ is not the name of a built-in + # field converter: + # csv = CSV.open(path, headers: true) + # csv.header_convert(:nosuch) + # # Raises NoMethodError (undefined method `arity' for nil:NilClass) + # csv.read def header_convert(name = nil, &converter) - add_converter( :header_converters, - self.class::HeaderConverters, - name, - &converter ) + header_fields_converter.add_converter(name, &converter) end include Enumerable - # - # Yields each row of the data source in turn. - # - # Support for Enumerable. - # - # The data source must be open for reading. - # - def each - if block_given? - while row = shift - yield row - end - else - to_enum - end + # :call-seq: + # csv.each -> enumerator + # csv.each {|row| ...} + # + # Calls the block with each successive row. + # The data source must be opened for reading. + # + # Without headers: + # string = "foo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string) + # csv.each do |row| + # p row + # end + # Output: + # ["foo", "0"] + # ["bar", "1"] + # ["baz", "2"] + # + # With headers: + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string, headers: true) + # csv.each do |row| + # p row + # end + # Output: + # <CSV::Row "Name":"foo" "Value":"0"> + # <CSV::Row "Name":"bar" "Value":"1"> + # <CSV::Row "Name":"baz" "Value":"2"> + # + # --- + # + # Raises an exception if the source is not opened for reading: + # string = "foo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string) + # csv.close + # # Raises IOError (not opened for reading) + # csv.each do |row| + # p row + # end + def each(&block) + parser_enumerator.each(&block) end - # - # Slurps the remaining rows and returns an Array of Arrays. - # - # The data source must be open for reading. - # + # :call-seq: + # csv.read -> array or csv_table + # + # Forms the remaining rows from +self+ into: + # - A CSV::Table object, if headers are in use. + # - An \Array of Arrays, otherwise. + # + # The data source must be opened for reading. + # + # Without headers: + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # csv = CSV.open(path) + # csv.read # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # With headers: + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # csv = CSV.open(path, headers: true) + # csv.read # => #<CSV::Table mode:col_or_row row_count:4> + # + # --- + # + # Raises an exception if the source is not opened for reading: + # string = "foo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string) + # csv.close + # # Raises IOError (not opened for reading) + # csv.read def read rows = to_a - if @use_headers - Table.new(rows) + if parser.use_headers? + Table.new(rows, headers: parser.headers) else rows end end alias_method :readlines, :read - # Returns +true+ if the next row read will be a header row. + # :call-seq: + # csv.header_row? -> true or false + # + # Returns +true+ if the next row to be read is a header row\; + # +false+ otherwise. + # + # Without headers: + # string = "foo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string) + # csv.header_row? # => false + # + # With headers: + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string, headers: true) + # csv.header_row? # => true + # csv.shift # => #<CSV::Row "Name":"foo" "Value":"0"> + # csv.header_row? # => false + # + # --- + # + # Raises an exception if the source is not opened for reading: + # string = "foo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string) + # csv.close + # # Raises IOError (not opened for reading) + # csv.header_row? def header_row? - @use_headers and @headers.nil? + parser.header_row? end - # - # The primary read method for wrapped Strings and IOs, a single row is pulled - # from the data source, parsed and returned as an Array of fields (if header - # rows are not used) or a CSV::Row (when header rows are used). - # - # The data source must be open for reading. - # + # :call-seq: + # csv.shift -> array, csv_row, or nil + # + # Returns the next row of data as: + # - An \Array if no headers are used. + # - A CSV::Row object if headers are used. + # + # The data source must be opened for reading. + # + # Without headers: + # string = "foo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string) + # csv.shift # => ["foo", "0"] + # csv.shift # => ["bar", "1"] + # csv.shift # => ["baz", "2"] + # csv.shift # => nil + # + # With headers: + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string, headers: true) + # csv.shift # => #<CSV::Row "Name":"foo" "Value":"0"> + # csv.shift # => #<CSV::Row "Name":"bar" "Value":"1"> + # csv.shift # => #<CSV::Row "Name":"baz" "Value":"2"> + # csv.shift # => nil + # + # --- + # + # Raises an exception if the source is not opened for reading: + # string = "foo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string) + # csv.close + # # Raises IOError (not opened for reading) + # csv.shift def shift - ######################################################################### - ### This method is purposefully kept a bit long as simple conditional ### - ### checks are faster than numerous (expensive) method calls. ### - ######################################################################### - - # handle headers not based on document content - if header_row? and @return_headers and - [Array, String].include? @use_headers.class - if @unconverted_fields - return add_unconverted_fields(parse_headers, Array.new) - else - return parse_headers - end + if @eof_error + eof_error, @eof_error = @eof_error, nil + raise eof_error end - - # - # it can take multiple calls to <tt>@io.gets()</tt> to get a full line, - # because of \r and/or \n characters embedded in quoted fields - # - in_extended_col = false - csv = Array.new - - loop do - # add another read to the line - unless parse = @io.gets(@row_sep) - return nil - end - - parse.sub!(@parsers[:line_end], "") - - if csv.empty? - # - # I believe a blank line should be an <tt>Array.new</tt>, not Ruby 1.8 - # CSV's <tt>[nil]</tt> - # - if parse.empty? - @lineno += 1 - if @skip_blanks - next - elsif @unconverted_fields - return add_unconverted_fields(Array.new, Array.new) - elsif @use_headers - return self.class::Row.new(Array.new, Array.new) - else - return Array.new - end - end - end - - next if @skip_lines and @skip_lines.match parse - - parts = parse.split(@col_sep, -1) - if parts.empty? - if in_extended_col - csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop - else - csv << nil - end - end - - # This loop is the hot path of csv parsing. Some things may be non-dry - # for a reason. Make sure to benchmark when refactoring. - parts.each do |part| - if in_extended_col - # If we are continuing a previous column - if part[-1] == @quote_char && part.count(@quote_char) % 2 != 0 - # extended column ends - csv.last << part[0..-2] - if csv.last =~ @parsers[:stray_quote] - raise MalformedCSVError, - "Missing or stray quote in line #{lineno + 1}" - end - csv.last.gsub!(@quote_char * 2, @quote_char) - in_extended_col = false - else - csv.last << part - csv.last << @col_sep - end - elsif part[0] == @quote_char - # If we are staring a new quoted column - if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0 - # start an extended column - csv << part[1..-1] - csv.last << @col_sep - in_extended_col = true - else - # regular quoted column - csv << part[1..-2] - if csv.last =~ @parsers[:stray_quote] - raise MalformedCSVError, - "Missing or stray quote in line #{lineno + 1}" - end - csv.last.gsub!(@quote_char * 2, @quote_char) - end - elsif part =~ @parsers[:quote_or_nl] - # Unquoted field with bad characters. - if part =~ @parsers[:nl_or_lf] - raise MalformedCSVError, "Unquoted fields do not allow " + - "\\r or \\n (line #{lineno + 1})." - else - raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}." - end - else - # Regular ole unquoted field. - csv << (part.empty? ? nil : part) - end - end - - # Replace tacked on @col_sep with @row_sep if we are still in an extended - # column. - csv[-1][-1] = @row_sep if in_extended_col - - if in_extended_col - # if we're at eof?(), a quoted field wasn't closed... - if @io.eof? - raise MalformedCSVError, - "Unclosed quoted field on line #{lineno + 1}." - elsif @field_size_limit and csv.last.size >= @field_size_limit - raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}." - end - # otherwise, we need to loop and pull some more data to complete the row - else - @lineno += 1 - - # save fields unconverted fields, if needed... - unconverted = csv.dup if @unconverted_fields - - # convert fields, if needed... - csv = convert_fields(csv) unless @use_headers or @converters.empty? - # parse out header rows and handle CSV::Row conversions... - csv = parse_headers(csv) if @use_headers - - # inject unconverted fields and accessor, if requested... - if @unconverted_fields and not csv.respond_to? :unconverted_fields - add_unconverted_fields(csv, unconverted) - end - - # return the results - break csv - end + begin + parser_enumerator.next + rescue StopIteration + nil end end alias_method :gets, :shift alias_method :readline, :shift + # :call-seq: + # csv.inspect -> string # - # Returns a simplified description of the key CSV attributes in an - # ASCII compatible String. - # + # Returns a \String showing certain properties of +self+: + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # csv = CSV.new(string, headers: true) + # s = csv.inspect + # s # => "#<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:\",\" row_sep:\"\\n\" quote_char:\"\\\"\" headers:true>" def inspect - str = ["<#", self.class.to_s, " io_type:"] + str = ["#<", self.class.to_s, " io_type:"] # show type of wrapped IO if @io == $stdout then str << "$stdout" elsif @io == $stdin then str << "$stdin" @@ -1935,15 +2695,18 @@ class CSV # show encoding str << " encoding:" << @encoding.name # show other attributes - %w[ lineno col_sep row_sep - quote_char skip_blanks ].each do |attr_name| - if a = instance_variable_get("@#{attr_name}") + ["lineno", "col_sep", "row_sep", "quote_char"].each do |attr_name| + if a = __send__(attr_name) str << " " << attr_name << ":" << a.inspect end end - if @use_headers - str << " headers:" << headers.inspect + ["skip_blanks", "liberal_parsing"].each do |attr_name| + if a = __send__("#{attr_name}?") + str << " " << attr_name << ":" << a.inspect + end end + _headers = headers + str << " headers:" << _headers.inspect if _headers str << ">" begin str.join('') @@ -1957,346 +2720,126 @@ class CSV private - # - # Stores the indicated separators for later use. - # - # If auto-discovery was requested for <tt>@row_sep</tt>, this method will read - # ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+, - # +STDERR+ and any stream open for output only with a default - # <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>). - # - # This method also establishes the quoting rules used for CSV output. - # - def init_separators(options) - # store the selected separators - @col_sep = options.delete(:col_sep).to_s.encode(@encoding) - @row_sep = options.delete(:row_sep) # encode after resolving :auto - @quote_char = options.delete(:quote_char).to_s.encode(@encoding) + def determine_encoding(encoding, internal_encoding) + # honor the IO encoding if we can, otherwise default to ASCII-8BIT + io_encoding = raw_encoding + return io_encoding if io_encoding + + return Encoding.find(internal_encoding) if internal_encoding - if @quote_char.length != 1 - raise ArgumentError, ":quote_char has to be a single character String" + if encoding + encoding, = encoding.split(":", 2) if encoding.is_a?(String) + return Encoding.find(encoding) end - # - # automatically discover row separator when requested - # (not fully encoding safe) - # - if @row_sep == :auto - if [ARGF, STDIN, STDOUT, STDERR].include?(@io) or - (defined?(Zlib) and @io.class == Zlib::GzipWriter) - @row_sep = $INPUT_RECORD_SEPARATOR - else - begin - # - # remember where we were (pos() will raise an exception if @io is pipe - # or not opened for reading) - # - saved_pos = @io.pos - while @row_sep == :auto - # - # if we run out of data, it's probably a single line - # (ensure will set default value) - # - break unless sample = @io.gets(nil, 1024) - # extend sample if we're unsure of the line ending - if sample.end_with? encode_str("\r") - sample << (@io.gets(nil, 1) || "") - end - - # try to find a standard separator - if sample =~ encode_re("\r\n?|\n") - @row_sep = $& - break - end - end + Encoding.default_internal || Encoding.default_external + end - # tricky seek() clone to work around GzipReader's lack of seek() - @io.rewind - # reset back to the remembered position - while saved_pos > 1024 # avoid loading a lot of data into memory - @io.read(1024) - saved_pos -= 1024 - end - @io.read(saved_pos) if saved_pos.nonzero? - rescue IOError # not opened for reading - # do nothing: ensure will set default - rescue NoMethodError # Zlib::GzipWriter doesn't have some IO methods - # do nothing: ensure will set default - rescue SystemCallError # pipe - # do nothing: ensure will set default - ensure - # - # set default if we failed to detect - # (stream not opened for reading, a pipe, or a single line of data) - # - @row_sep = $INPUT_RECORD_SEPARATOR if @row_sep == :auto - end - end - end - @row_sep = @row_sep.to_s.encode(@encoding) - - # establish quoting rules - @force_quotes = options.delete(:force_quotes) - do_quote = lambda do |field| - field = String(field) - encoded_quote = @quote_char.encode(field.encoding) - encoded_quote + - field.gsub(encoded_quote, encoded_quote * 2) + - encoded_quote - end - quotable_chars = encode_str("\r\n", @col_sep, @quote_char) - @quote = if @force_quotes - do_quote - else - lambda do |field| - if field.nil? # represent +nil+ fields as empty unquoted fields - "" - else - field = String(field) # Stringify fields - # represent empty fields as empty quoted fields - if field.empty? or - field.count(quotable_chars).nonzero? - do_quote.call(field) - else - field # unquoted field - end - end + def normalize_converters(converters) + converters ||= [] + unless converters.is_a?(Array) + converters = [converters] + end + converters.collect do |converter| + case converter + when Proc # custom code block + [nil, converter] + else # by name + [converter, nil] end end end - # Pre-compiles parsers and stores them by name for access during reads. - def init_parsers(options) - # store the parser behaviors - @skip_blanks = options.delete(:skip_blanks) - @field_size_limit = options.delete(:field_size_limit) - - # prebuild Regexps for faster parsing - esc_row_sep = escape_re(@row_sep) - esc_quote = escape_re(@quote_char) - @parsers = { - # for detecting parse errors - quote_or_nl: encode_re("[", esc_quote, "\r\n]"), - nl_or_lf: encode_re("[\r\n]"), - stray_quote: encode_re( "[^", esc_quote, "]", esc_quote, - "[^", esc_quote, "]" ), - # safer than chomp!() - line_end: encode_re(esc_row_sep, "\\z"), - # illegal unquoted characters - return_newline: encode_str("\r\n") - } - end - - # - # Loads any converters requested during construction. # - # If +field_name+ is set <tt>:converters</tt> (the default) field converters - # are set. When +field_name+ is <tt>:header_converters</tt> header converters - # are added instead. - # - # The <tt>:unconverted_fields</tt> option is also actived for - # <tt>:converters</tt> calls, if requested. + # Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt> + # if +headers+ is passed as +true+, returning the converted field set. Any + # converter that changes the field into something other than a String halts + # the pipeline of conversion for that field. This is primarily an efficiency + # shortcut. # - def init_converters(options, field_name = :converters) - if field_name == :converters - @unconverted_fields = options.delete(:unconverted_fields) + def convert_fields(fields, headers = false) + if headers + header_fields_converter.convert(fields, nil, 0) + else + parser_fields_converter.convert(fields, @headers, lineno) end + end - instance_variable_set("@#{field_name}", Array.new) - - # find the correct method to add the converters - convert = method(field_name.to_s.sub(/ers\Z/, "")) - - # load converters - unless options[field_name].nil? - # allow a single converter not wrapped in an Array - unless options[field_name].is_a? Array - options[field_name] = [options[field_name]] - end - # load each converter... - options[field_name].each do |converter| - if converter.is_a? Proc # custom code block - convert.call(&converter) - else # by name - convert.call(converter) - end - end + # + # Returns the encoding of the internal IO object. + # + def raw_encoding + if @io.respond_to? :internal_encoding + @io.internal_encoding || @io.external_encoding + elsif @io.respond_to? :encoding + @io.encoding + else + nil end - - options.delete(field_name) end - # Stores header row settings and loads header converters, if needed. - def init_headers(options) - @use_headers = options.delete(:headers) - @return_headers = options.delete(:return_headers) - @write_headers = options.delete(:write_headers) - - # headers must be delayed until shift(), in case they need a row of content - @headers = nil - - init_converters(options, :header_converters) + def parser_fields_converter + @parser_fields_converter ||= build_parser_fields_converter end - # Stores the pattern of comments to skip from the provided options. - # - # The pattern must respond to +.match+, else ArgumentError is raised. - # Strings are converted to a Regexp. - # - # See also CSV.new - def init_comments(options) - @skip_lines = options.delete(:skip_lines) - @skip_lines = Regexp.new(@skip_lines) if @skip_lines.is_a? String - if @skip_lines and not @skip_lines.respond_to?(:match) - raise ArgumentError, ":skip_lines has to respond to matches" - end + def build_parser_fields_converter + specific_options = { + builtin_converters_name: :Converters, + } + options = @base_fields_converter_options.merge(specific_options) + build_fields_converter(@initial_converters, options) end - # - # The actual work method for adding converters, used by both CSV.convert() and - # CSV.header_convert(). - # - # This method requires the +var_name+ of the instance variable to place the - # converters in, the +const+ Hash to lookup named converters in, and the - # normal parameters of the CSV.convert() and CSV.header_convert() methods. - # - def add_converter(var_name, const, name = nil, &converter) - if name.nil? # custom converter - instance_variable_get("@#{var_name}") << converter - else # named converter - combo = const[name] - case combo - when Array # combo converter - combo.each do |converter_name| - add_converter(var_name, const, converter_name) - end - else # individual named converter - instance_variable_get("@#{var_name}") << combo - end - end + + def header_fields_converter + @header_fields_converter ||= build_header_fields_converter end - # - # Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt> - # if +headers+ is passed as +true+, returning the converted field set. Any - # converter that changes the field into something other than a String halts - # the pipeline of conversion for that field. This is primarily an efficiency - # shortcut. - # - def convert_fields(fields, headers = false) - # see if we are converting headers or fields - converters = headers ? @header_converters : @converters - - fields.map.with_index do |field, index| - converters.each do |converter| - break if field.nil? - field = if converter.arity == 1 # straight field converter - converter[field] - else # FieldInfo converter - header = @use_headers && !headers ? @headers[index] : nil - converter[field, FieldInfo.new(index, lineno, header)] - end - break unless field.is_a? String # short-circuit pipeline for speed - end - field # final state of each field, converted or original - end + def build_header_fields_converter + specific_options = { + builtin_converters_name: :HeaderConverters, + accept_nil: true, + } + options = @base_fields_converter_options.merge(specific_options) + build_fields_converter(@initial_header_converters, options) end - # - # This method is used to turn a finished +row+ into a CSV::Row. Header rows - # are also dealt with here, either by returning a CSV::Row with identical - # headers and fields (save that the fields do not go through the converters) - # or by reading past them to return a field row. Headers are also saved in - # <tt>@headers</tt> for use in future rows. - # - # When +nil+, +row+ is assumed to be a header row not based on an actual row - # of the stream. - # - def parse_headers(row = nil) - if @headers.nil? # header row - @headers = case @use_headers # save headers - # Array of headers - when Array then @use_headers - # CSV header String - when String - self.class.parse_line( @use_headers, - col_sep: @col_sep, - row_sep: @row_sep, - quote_char: @quote_char ) - # first row is headers - else row - end - - # prepare converted and unconverted copies - row = @headers if row.nil? - @headers = convert_fields(@headers, true) - @headers.each { |h| h.freeze if h.is_a? String } - - if @return_headers # return headers - return self.class::Row.new(@headers, row, true) - elsif not [Array, String].include? @use_headers.class # skip to field row - return shift - end - end + def writer_fields_converter + @writer_fields_converter ||= build_writer_fields_converter + end - self.class::Row.new(@headers, convert_fields(row)) # field row + def build_writer_fields_converter + build_fields_converter(@initial_write_converters, + @write_fields_converter_options) end - # - # This method injects an instance variable <tt>unconverted_fields</tt> into - # +row+ and an accessor method for +row+ called unconverted_fields(). The - # variable is set to the contents of +fields+. - # - def add_unconverted_fields(row, fields) - class << row - attr_reader :unconverted_fields + def build_fields_converter(initial_converters, options) + fields_converter = FieldsConverter.new(options) + normalize_converters(initial_converters).each do |name, converter| + fields_converter.add_converter(name, &converter) end - row.instance_eval { @unconverted_fields = fields } - row + fields_converter end - # - # This method is an encoding safe version of Regexp::escape(). It will escape - # any characters that would change the meaning of a regular expression in the - # encoding of +str+. Regular expression characters that cannot be transcoded - # to the target encoding will be skipped and no escaping will be performed if - # a backslash cannot be transcoded. - # - def escape_re(str) - str.gsub(@re_chars) {|c| @re_esc + c} + def parser + @parser ||= Parser.new(@io, parser_options) end - # - # Builds a regular expression in <tt>@encoding</tt>. All +chunks+ will be - # transcoded to that encoding. - # - def encode_re(*chunks) - Regexp.new(encode_str(*chunks)) + def parser_options + @parser_options.merge(header_fields_converter: header_fields_converter, + fields_converter: parser_fields_converter) end - # - # Builds a String in <tt>@encoding</tt>. All +chunks+ will be transcoded to - # that encoding. - # - def encode_str(*chunks) - chunks.map { |chunk| chunk.encode(@encoding.name) }.join('') + def parser_enumerator + @parser_enumerator ||= parser.parse end - private + def writer + @writer ||= Writer.new(@io, writer_options) + end - # - # Returns the encoding of the internal IO object or the +default+ if the - # encoding cannot be determined. - # - def raw_encoding(default = Encoding::ASCII_8BIT) - if @io.respond_to? :internal_encoding - @io.internal_encoding || @io.external_encoding - elsif @io.is_a? StringIO - @io.string.encoding - elsif @io.respond_to? :encoding - @io.encoding - else - default - end + def writer_options + @writer_options.merge(header_fields_converter: header_fields_converter, + fields_converter: writer_fields_converter) end end @@ -2316,26 +2859,17 @@ end # c.read.any? { |a| a.include?("zombies") } # } #=> false # -def CSV(*args, &block) - CSV.instance(*args, &block) -end - -class Array # :nodoc: - # Equivalent to CSV::generate_line(self, options) - # - # ["CSV", "data"].to_csv - # #=> "CSV,data\n" - def to_csv(options = Hash.new) - CSV.generate_line(self, options) - end +# CSV options may also be given. +# +# io = StringIO.new +# CSV(io, col_sep: ";") { |csv| csv << ["a", "b", "c"] } +# +# This API is not Ractor-safe. +# +def CSV(*args, **options, &block) + CSV.instance(*args, **options, &block) end -class String # :nodoc: - # Equivalent to CSV::parse_line(self, options) - # - # "CSV,data".parse_csv - # #=> ["CSV", "data"] - def parse_csv(options = Hash.new) - CSV.parse_line(self, options) - end -end +require_relative "csv/version" +require_relative "csv/core_ext/array" +require_relative "csv/core_ext/string" |
