summaryrefslogtreecommitdiff
path: root/lib/csv
diff options
context:
space:
mode:
authorSutou Kouhei <kou@cozmixng.org>2019-10-12 14:03:21 +0900
committerGitHub <noreply@github.com>2019-10-12 14:03:21 +0900
commit92df7d98b62f48cf21cdec522f2e7b34380fd718 (patch)
treea0d169e177ebd5607caefa26cef90cc70df48232 /lib/csv
parentd6e68bb263e79cb802fa683d9c4139ddca2fd4f5 (diff)
Import CSV 3.1.2 (#2547)
Notes
Notes: Merged-By: kou <kou@clear-code.com>
Diffstat (limited to 'lib/csv')
-rw-r--r--lib/csv/fields_converter.rb6
-rw-r--r--lib/csv/parser.rb60
-rw-r--r--lib/csv/row.rb30
-rw-r--r--lib/csv/table.rb54
-rw-r--r--lib/csv/version.rb2
-rw-r--r--lib/csv/writer.rb11
6 files changed, 113 insertions, 50 deletions
diff --git a/lib/csv/fields_converter.rb b/lib/csv/fields_converter.rb
index c2fa5798ff..a751c9ea1d 100644
--- a/lib/csv/fields_converter.rb
+++ b/lib/csv/fields_converter.rb
@@ -1,8 +1,14 @@
# frozen_string_literal: true
class CSV
+ # Note: Don't use this class directly. This is an internal class.
class FieldsConverter
include Enumerable
+ #
+ # A CSV::FieldsConverter is a data structure for storing the
+ # fields converter properties to be passed as a parameter
+ # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
+ #
def initialize(options={})
@converters = []
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index 2ef2a28ff3..42145f8923 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV
+ # Note: Don't use this class directly. This is an internal class.
class Parser
+ #
+ # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
+ # or String object being read from or written to. Your data is never transcoded
+ # (unless you ask Ruby to transcode it for you) and will literally be parsed in
+ # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
+ # Encoding of your data. This is accomplished by transcoding the parser itself
+ # into your Encoding.
+ #
+
+ # Raised when encoding is invalid.
class InvalidEncoding < StandardError
end
+ #
+ # CSV::Scanner receives a CSV output, scans it and return the content.
+ # It also controls the life cycle of the object with its methods +keep_start+,
+ # +keep_end+, +keep_back+, +keep_drop+.
+ #
+ # Uses StringScanner (the official strscan gem). Strscan provides lexical
+ # scanning operations on a String. We inherit its object and take advantage
+ # on the methods. For more information, please visit:
+ # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
+ #
class Scanner < StringScanner
alias_method :scan_all, :scan
@@ -38,7 +59,7 @@ class CSV
def keep_end
start = @keeps.pop
- string[start, pos - start]
+ string.byteslice(start, pos - start)
end
def keep_back
@@ -50,6 +71,18 @@ class CSV
end
end
+ #
+ # CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
+ # It also controls the life cycle of the object with its methods +keep_start+,
+ # +keep_end+, +keep_back+, +keep_drop+.
+ #
+ # CSV::InputsScanner.scan() tries to match with pattern at the current position.
+ # If there's a match, the scanner advances the “scan pointer” and returns the matched string.
+ # Otherwise, the scanner returns nil.
+ #
+ # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
+ # If there is no more data (eos? = true), it returns "".
+ #
class InputsScanner
def initialize(inputs, encoding, chunk_size: 8192)
@inputs = inputs.dup
@@ -137,7 +170,7 @@ class CSV
def keep_end
start, buffer = @keeps.pop
- keep = @scanner.string[start, @scanner.pos - start]
+ keep = @scanner.string.byteslice(start, @scanner.pos - start)
if buffer
buffer << keep
keep = buffer
@@ -192,7 +225,7 @@ class CSV
input = @inputs.first
case input
when StringIO
- string = input.string
+ string = input.read
raise InvalidEncoding unless string.valid_encoding?
@scanner = StringScanner.new(string)
@inputs.shift
@@ -319,6 +352,7 @@ class CSV
end
private
+ # A set of tasks to prepare the file in order to parse it
def prepare
prepare_variable
prepare_quote_character
@@ -447,7 +481,13 @@ class CSV
end
def prepare_separators
- @column_separator = @options[:column_separator].to_s.encode(@encoding)
+ column_separator = @options[:column_separator]
+ @column_separator = column_separator.to_s.encode(@encoding)
+ if @column_separator.size < 1
+ message = ":col_sep must be 1 or more characters: "
+ message += column_separator.inspect
+ raise ArgumentError, message
+ end
@row_separator =
resolve_row_separator(@options[:row_separator]).encode(@encoding)
@@ -534,7 +574,9 @@ class CSV
cr = "\r".encode(@encoding)
lf = "\n".encode(@encoding)
if @input.is_a?(StringIO)
- separator = detect_row_separator(@input.string, cr, lf)
+ pos = @input.pos
+ separator = detect_row_separator(@input.read, cr, lf)
+ @input.seek(pos)
elsif @input.respond_to?(:gets)
if @input.is_a?(File)
chunk_size = 32 * 1024
@@ -651,7 +693,9 @@ class CSV
return false if @quote_character.nil?
if @input.is_a?(StringIO)
- sample = @input.string
+ pos = @input.pos
+ sample = @input.read
+ @input.seek(pos)
else
return false if @samples.empty?
sample = @samples.first
@@ -684,7 +728,7 @@ class CSV
UnoptimizedStringIO.new(sample)
end
if @input.is_a?(StringIO)
- inputs << UnoptimizedStringIO.new(@input.string)
+ inputs << UnoptimizedStringIO.new(@input.read)
else
inputs << @input
end
@@ -697,7 +741,7 @@ class CSV
def build_scanner
string = nil
if @samples.empty? and @input.is_a?(StringIO)
- string = @input.string
+ string = @input.read
elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
string = @samples[0]
end
diff --git a/lib/csv/row.rb b/lib/csv/row.rb
index 1e1f27587b..4aa0f30911 100644
--- a/lib/csv/row.rb
+++ b/lib/csv/row.rb
@@ -4,7 +4,7 @@ require "forwardable"
class CSV
#
- # A CSV::Row is part Array and part Hash. It retains an order for the fields
+ # A CSV::Row is part Array and part Hash. It retains an order for the fields
# and allows duplicates just as an Array would, but also allows you to access
# fields by name just as you could if they were in a Hash.
#
@@ -13,13 +13,13 @@ class CSV
#
class Row
#
- # Construct a new CSV::Row from +headers+ and +fields+, which are expected
- # to be Arrays. If one Array is shorter than the other, it will be padded
+ # Constructs a new CSV::Row from +headers+ and +fields+, which are expected
+ # to be Arrays. If one Array is shorter than the other, it will be padded
# with +nil+ objects.
#
# The optional +header_row+ parameter can be set to +true+ to indicate, via
# CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
- # row. Otherwise, the row is assumes to be a field row.
+ # row. Otherwise, the row assumes to be a field row.
#
# A CSV::Row object supports the following Array methods through delegation:
#
@@ -74,11 +74,11 @@ class CSV
# field( header, offset )
# field( index )
#
- # This method will return the field value by +header+ or +index+. If a field
+ # This method will return the field value by +header+ or +index+. If a field
# is not found, +nil+ is returned.
#
# When provided, +offset+ ensures that a header match occurs on or later
- # than the +offset+ index. You can use this to find duplicate headers,
+ # than the +offset+ index. You can use this to find duplicate headers,
# without resorting to hard-coding exact indices.
#
def field(header_or_index, minimum_index = 0)
@@ -142,7 +142,7 @@ class CSV
# assigns the +value+.
#
# Assigning past the end of the row with an index will set all pairs between
- # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
+ # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
# pair.
#
def []=(*args)
@@ -172,8 +172,8 @@ class CSV
# <<( header_and_field_hash )
#
# If a two-element Array is provided, it is assumed to be a header and field
- # and the pair is appended. A Hash works the same way with the key being
- # the header and the value being the field. Anything else is assumed to be
+ # and the pair is appended. A Hash works the same way with the key being
+ # the header and the value being the field. Anything else is assumed to be
# a lone field which is appended with a +nil+ header.
#
# This method returns the row for chaining.
@@ -191,7 +191,7 @@ class CSV
end
#
- # A shortcut for appending multiple fields. Equivalent to:
+ # A shortcut for appending multiple fields. Equivalent to:
#
# args.each { |arg| csv_row << arg }
#
@@ -209,8 +209,8 @@ class CSV
# delete( header, offset )
# delete( index )
#
- # Used to remove a pair from the row by +header+ or +index+. The pair is
- # located as described in CSV::Row.field(). The deleted pair is returned,
+ # Removes a pair from the row by +header+ or +index+. The pair is
+ # located as described in CSV::Row.field(). The deleted pair is returned,
# or +nil+ if a pair could not be found.
#
def delete(header_or_index, minimum_index = 0)
@@ -325,7 +325,7 @@ class CSV
end
#
- # Collapses the row into a simple Hash. Be warned that this discards field
+ # Collapses the row into a simple Hash. Be warned that this discards field
# order and clobbers duplicate fields.
#
def to_h
@@ -340,7 +340,7 @@ class CSV
alias_method :to_ary, :to_a
#
- # Returns the row as a CSV String. Headers are not used. Equivalent to:
+ # Returns the row as a CSV String. Headers are not used. Equivalent to:
#
# csv_row.fields.to_csv( options )
#
@@ -367,7 +367,9 @@ class CSV
end
end
+ #
# A summary of fields, by header, in an ASCII compatible String.
+ #
def inspect
str = ["#<", self.class.to_s]
each do |header, field|
diff --git a/lib/csv/table.rb b/lib/csv/table.rb
index 29b188a6d7..e6c1ee11fa 100644
--- a/lib/csv/table.rb
+++ b/lib/csv/table.rb
@@ -5,7 +5,7 @@ require "forwardable"
class CSV
#
# A CSV::Table is a two-dimensional data structure for representing CSV
- # documents. Tables allow you to work with the data by row or column,
+ # documents. Tables allow you to work with the data by row or column,
# manipulate the data, and even convert the results back to CSV, if needed.
#
# All tables returned by CSV will be constructed from this class, if header
@@ -13,8 +13,8 @@ class CSV
#
class Table
#
- # Construct a new CSV::Table from +array_of_rows+, which are expected
- # to be CSV::Row objects. All rows are assumed to have the same headers.
+ # Constructs a new CSV::Table from +array_of_rows+, which are expected
+ # to be CSV::Row objects. All rows are assumed to have the same headers.
#
# The optional +headers+ parameter can be set to Array of headers.
# If headers aren't set, headers are fetched from CSV::Row objects.
@@ -55,11 +55,11 @@ class CSV
def_delegators :@table, :empty?, :length, :size
#
- # Returns a duplicate table object, in column mode. This is handy for
+ # Returns a duplicate table object, in column mode. This is handy for
# chaining in a single call without changing the table mode, but be aware
# that this method can consume a fair amount of memory for bigger data sets.
#
- # This method returns the duplicate table for chaining. Don't chain
+ # This method returns the duplicate table for chaining. Don't chain
# destructive methods (like []=()) this way though, since you are working
# with a duplicate.
#
@@ -68,7 +68,7 @@ class CSV
end
#
- # Switches the mode of this table to column mode. All calls to indexing and
+ # Switches the mode of this table to column mode. All calls to indexing and
# iteration methods will work with columns until the mode is changed again.
#
# This method returns the table and is safe to chain.
@@ -80,7 +80,7 @@ class CSV
end
#
- # Returns a duplicate table object, in mixed mode. This is handy for
+ # Returns a duplicate table object, in mixed mode. This is handy for
# chaining in a single call without changing the table mode, but be aware
# that this method can consume a fair amount of memory for bigger data sets.
#
@@ -93,9 +93,9 @@ class CSV
end
#
- # Switches the mode of this table to mixed mode. All calls to indexing and
+ # Switches the mode of this table to mixed mode. All calls to indexing and
# iteration methods will use the default intelligent indexing system until
- # the mode is changed again. In mixed mode an index is assumed to be a row
+ # the mode is changed again. In mixed mode an index is assumed to be a row
# reference while anything else is assumed to be column access by headers.
#
# This method returns the table and is safe to chain.
@@ -120,7 +120,7 @@ class CSV
end
#
- # Switches the mode of this table to row mode. All calls to indexing and
+ # Switches the mode of this table to row mode. All calls to indexing and
# iteration methods will work with rows until the mode is changed again.
#
# This method returns the table and is safe to chain.
@@ -146,7 +146,7 @@ class CSV
#
# In the default mixed mode, this method returns rows for index access and
- # columns for header access. You can force the index association by first
+ # columns for header access. You can force the index association by first
# calling by_col!() or by_row!().
#
# Columns are returned as an Array of values. Altering that Array has no
@@ -163,18 +163,18 @@ class CSV
#
# In the default mixed mode, this method assigns rows for index access and
- # columns for header access. You can force the index association by first
+ # columns for header access. You can force the index association by first
# calling by_col!() or by_row!().
#
# Rows may be set to an Array of values (which will inherit the table's
# headers()) or a CSV::Row.
#
# Columns may be set to a single value, which is copied to each row of the
- # column, or an Array of values. Arrays of values are assigned to rows top
- # to bottom in row major order. Excess values are ignored and if the Array
+ # column, or an Array of values. Arrays of values are assigned to rows top
+ # to bottom in row major order. Excess values are ignored and if the Array
# does not have a value for each row the extra rows will receive a +nil+.
#
- # Assigning to an existing column or row clobbers the data. Assigning to
+ # Assigning to an existing column or row clobbers the data. Assigning to
# new columns creates them at the right end of the table.
#
def []=(index_or_header, value)
@@ -212,9 +212,9 @@ class CSV
#
# The mixed mode default is to treat a list of indices as row access,
- # returning the rows indicated. Anything else is considered columnar
- # access. For columnar access, the return set has an Array for each row
- # with the values indicated by the headers in each Array. You can force
+ # returning the rows indicated. Anything else is considered columnar
+ # access. For columnar access, the return set has an Array for each row
+ # with the values indicated by the headers in each Array. You can force
# column or row mode using by_col!() or by_row!().
#
# You cannot mix column and row access.
@@ -234,7 +234,7 @@ class CSV
end
#
- # Adds a new row to the bottom end of this table. You can provide an Array,
+ # Adds a new row to the bottom end of this table. You can provide an Array,
# which will be converted to a CSV::Row (inheriting the table's headers()),
# or a CSV::Row.
#
@@ -251,7 +251,7 @@ class CSV
end
#
- # A shortcut for appending multiple rows. Equivalent to:
+ # A shortcut for appending multiple rows. Equivalent to:
#
# rows.each { |row| self << row }
#
@@ -264,9 +264,9 @@ class CSV
end
#
- # Removes and returns the indicated columns or rows. In the default mixed
+ # Removes and returns the indicated columns or rows. In the default mixed
# mode indices refer to rows and everything else is assumed to be a column
- # headers. Use by_col!() or by_row!() to force the lookup.
+ # headers. Use by_col!() or by_row!() to force the lookup.
#
def delete(*indexes_or_headers)
if indexes_or_headers.empty?
@@ -293,9 +293,9 @@ class CSV
end
#
- # Removes any column or row for which the block returns +true+. In the
+ # Removes any column or row for which the block returns +true+. In the
# default mixed mode or row mode, iteration is the standard row major
- # walking of rows. In column mode, iteration will +yield+ two element
+ # walking of rows. In column mode, iteration will +yield+ two element
# tuples containing the column name and an Array of values for that column.
#
# This method returns the table for chaining.
@@ -321,7 +321,7 @@ class CSV
#
# In the default mixed mode or row mode, iteration is the standard row major
- # walking of rows. In column mode, iteration will +yield+ two element
+ # walking of rows. In column mode, iteration will +yield+ two element
# tuples containing the column name and an Array of values for that column.
#
# This method returns the table for chaining.
@@ -347,7 +347,7 @@ class CSV
end
#
- # Returns the table as an Array of Arrays. Headers will be the first row,
+ # Returns the table as an Array of Arrays. Headers will be the first row,
# then all of the field rows will follow.
#
def to_a
@@ -360,7 +360,7 @@ class CSV
end
#
- # Returns the table as a complete CSV String. Headers will be listed first,
+ # Returns the table as a complete CSV String. Headers will be listed first,
# then all of the field rows.
#
# This method assumes you want the Table.headers(), unless you explicitly
diff --git a/lib/csv/version.rb b/lib/csv/version.rb
index ce55373f02..072400fe01 100644
--- a/lib/csv/version.rb
+++ b/lib/csv/version.rb
@@ -2,5 +2,5 @@
class CSV
# The version of the installed library.
- VERSION = "3.1.1"
+ VERSION = "3.1.2"
end
diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb
index 1682ac03ea..9243d23641 100644
--- a/lib/csv/writer.rb
+++ b/lib/csv/writer.rb
@@ -6,7 +6,12 @@ require_relative "row"
using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV
+ # Note: Don't use this class directly. This is an internal class.
class Writer
+ #
+ # A CSV::Writer receives an output, prepares the header, format and output.
+ # It allows us to write new rows in the object and rewind it.
+ #
attr_reader :lineno
attr_reader :headers
@@ -22,6 +27,9 @@ class CSV
@fields_converter = @options[:fields_converter]
end
+ #
+ # Adds a new row
+ #
def <<(row)
case row
when Row
@@ -47,6 +55,9 @@ class CSV
self
end
+ #
+ # Winds back to the beginning
+ #
def rewind
@lineno = 0
@headers = nil if @options[:headers].nil?