Import CSV 3.1.2 (#2547)

author: Sutou Kouhei <kou@cozmixng.org> 2019-10-12 14:03:21 +0900
committer: GitHub <noreply@github.com> 2019-10-12 14:03:21 +0900
commit: 92df7d98b62f48cf21cdec522f2e7b34380fd718 (patch)
tree: a0d169e177ebd5607caefa26cef90cc70df48232 /lib/csv
parent: d6e68bb263e79cb802fa683d9c4139ddca2fd4f5 (diff)
6 files changed, 113 insertions, 50 deletions
diff --git a/lib/csv/fields_converter.rb b/lib/csv/fields_converter.rb
index c2fa5798ff..a751c9ea1d 100644
--- a/lib/csv/fields_converter.rb
+++ b/lib/csv/fields_converter.rb
@@ -1,8 +1,14 @@
 # frozen_string_literal: true
 
 class CSV
+  # Note: Don't use this class directly. This is an internal class.
   class FieldsConverter
     include Enumerable
+    #
+    # A CSV::FieldsConverter is a data structure for storing the
+    # fields converter properties to be passed as a parameter
+    # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
+    #
 
     def initialize(options={})
       @converters = []
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index 2ef2a28ff3..42145f8923 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
 using CSV::MatchP if CSV.const_defined?(:MatchP)
 
 class CSV
+  # Note: Don't use this class directly. This is an internal class.
   class Parser
+    #
+    # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
+    # or String object being read from or written to. Your data is never transcoded
+    # (unless you ask Ruby to transcode it for you) and will literally be parsed in
+    # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
+    # Encoding of your data. This is accomplished by transcoding the parser itself
+    # into your Encoding.
+    #
+
+    # Raised when encoding is invalid.
     class InvalidEncoding < StandardError
     end
 
+    #
+    # CSV::Scanner receives a CSV output, scans it and return the content.
+    # It also controls the life cycle of the object with its methods +keep_start+,
+    # +keep_end+, +keep_back+, +keep_drop+.
+    #
+    # Uses StringScanner (the official strscan gem). Strscan provides lexical
+    # scanning operations on a String. We inherit its object and take advantage
+    # on the methods. For more information, please visit:
+    # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
+    #
     class Scanner < StringScanner
       alias_method :scan_all, :scan
 
@@ -38,7 +59,7 @@ class CSV
 
       def keep_end
         start = @keeps.pop
-        string[start, pos - start]
+        string.byteslice(start, pos - start)
       end
 
       def keep_back
@@ -50,6 +71,18 @@ class CSV
       end
     end
 
+    #
+    # CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
+    # It also controls the life cycle of the object with its methods +keep_start+,
+    # +keep_end+, +keep_back+, +keep_drop+.
+    #
+    # CSV::InputsScanner.scan() tries to match with pattern at the current position.
+    # If there's a match, the scanner advances the “scan pointer” and returns the matched string.
+    # Otherwise, the scanner returns nil.
+    #
+    # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
+    # If there is no more data (eos? = true), it returns "".
+    #
     class InputsScanner
       def initialize(inputs, encoding, chunk_size: 8192)
         @inputs = inputs.dup
@@ -137,7 +170,7 @@ class CSV
 
       def keep_end
         start, buffer = @keeps.pop
-        keep = @scanner.string[start, @scanner.pos - start]
+        keep = @scanner.string.byteslice(start, @scanner.pos - start)
         if buffer
           buffer << keep
           keep = buffer
@@ -192,7 +225,7 @@ class CSV
         input = @inputs.first
         case input
         when StringIO
-          string = input.string
+          string = input.read
           raise InvalidEncoding unless string.valid_encoding?
           @scanner = StringScanner.new(string)
           @inputs.shift
@@ -319,6 +352,7 @@ class CSV
     end
 
     private
+    # A set of tasks to prepare the file in order to parse it
     def prepare
       prepare_variable
       prepare_quote_character
@@ -447,7 +481,13 @@ class CSV
     end
 
     def prepare_separators
-      @column_separator = @options[:column_separator].to_s.encode(@encoding)
+      column_separator = @options[:column_separator]
+      @column_separator = column_separator.to_s.encode(@encoding)
+      if @column_separator.size < 1
+        message = ":col_sep must be 1 or more characters: "
+        message += column_separator.inspect
+        raise ArgumentError, message
+      end
       @row_separator =
         resolve_row_separator(@options[:row_separator]).encode(@encoding)
 
@@ -534,7 +574,9 @@ class CSV
         cr = "\r".encode(@encoding)
         lf = "\n".encode(@encoding)
         if @input.is_a?(StringIO)
-          separator = detect_row_separator(@input.string, cr, lf)
+          pos = @input.pos
+          separator = detect_row_separator(@input.read, cr, lf)
+          @input.seek(pos)
         elsif @input.respond_to?(:gets)
           if @input.is_a?(File)
             chunk_size = 32 * 1024
@@ -651,7 +693,9 @@ class CSV
       return false if @quote_character.nil?
 
       if @input.is_a?(StringIO)
-        sample = @input.string
+        pos = @input.pos
+        sample = @input.read
+        @input.seek(pos)
       else
         return false if @samples.empty?
         sample = @samples.first
@@ -684,7 +728,7 @@ class CSV
           UnoptimizedStringIO.new(sample)
         end
         if @input.is_a?(StringIO)
-          inputs << UnoptimizedStringIO.new(@input.string)
+          inputs << UnoptimizedStringIO.new(@input.read)
         else
           inputs << @input
         end
@@ -697,7 +741,7 @@ class CSV
       def build_scanner
         string = nil
         if @samples.empty? and @input.is_a?(StringIO)
-          string = @input.string
+          string = @input.read
         elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
           string = @samples[0]
         end
diff --git a/lib/csv/row.rb b/lib/csv/row.rb
index 1e1f27587b..4aa0f30911 100644
--- a/lib/csv/row.rb
+++ b/lib/csv/row.rb
@@ -4,7 +4,7 @@ require "forwardable"
 
 class CSV
   #
-  # A CSV::Row is part Array and part Hash.  It retains an order for the fields
+  # A CSV::Row is part Array and part Hash. It retains an order for the fields
   # and allows duplicates just as an Array would, but also allows you to access
   # fields by name just as you could if they were in a Hash.
   #
@@ -13,13 +13,13 @@ class CSV
   #
   class Row
     #
-    # Construct a new CSV::Row from +headers+ and +fields+, which are expected
-    # to be Arrays.  If one Array is shorter than the other, it will be padded
+    # Constructs a new CSV::Row from +headers+ and +fields+, which are expected
+    # to be Arrays. If one Array is shorter than the other, it will be padded
     # with +nil+ objects.
     #
     # The optional +header_row+ parameter can be set to +true+ to indicate, via
     # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
-    # row.  Otherwise, the row is assumes to be a field row.
+    # row. Otherwise, the row assumes to be a field row.
     #
     # A CSV::Row object supports the following Array methods through delegation:
     #
@@ -74,11 +74,11 @@ class CSV
     #   field( header, offset )
     #   field( index )
     #
-    # This method will return the field value by +header+ or +index+.  If a field
+    # This method will return the field value by +header+ or +index+. If a field
     # is not found, +nil+ is returned.
     #
     # When provided, +offset+ ensures that a header match occurs on or later
-    # than the +offset+ index.  You can use this to find duplicate headers,
+    # than the +offset+ index. You can use this to find duplicate headers,
     # without resorting to hard-coding exact indices.
     #
     def field(header_or_index, minimum_index = 0)
@@ -142,7 +142,7 @@ class CSV
     # assigns the +value+.
     #
     # Assigning past the end of the row with an index will set all pairs between
-    # to <tt>[nil, nil]</tt>.  Assigning to an unused header appends the new
+    # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
     # pair.
     #
     def []=(*args)
@@ -172,8 +172,8 @@ class CSV
     #   <<( header_and_field_hash )
     #
     # If a two-element Array is provided, it is assumed to be a header and field
-    # and the pair is appended.  A Hash works the same way with the key being
-    # the header and the value being the field.  Anything else is assumed to be
+    # and the pair is appended. A Hash works the same way with the key being
+    # the header and the value being the field. Anything else is assumed to be
     # a lone field which is appended with a +nil+ header.
     #
     # This method returns the row for chaining.
@@ -191,7 +191,7 @@ class CSV
     end
 
     #
-    # A shortcut for appending multiple fields.  Equivalent to:
+    # A shortcut for appending multiple fields. Equivalent to:
     #
     #   args.each { |arg| csv_row << arg }
     #
@@ -209,8 +209,8 @@ class CSV
     #   delete( header, offset )
     #   delete( index )
     #
-    # Used to remove a pair from the row by +header+ or +index+.  The pair is
-    # located as described in CSV::Row.field().  The deleted pair is returned,
+    # Removes a pair from the row by +header+ or +index+. The pair is
+    # located as described in CSV::Row.field(). The deleted pair is returned,
     # or +nil+ if a pair could not be found.
     #
     def delete(header_or_index, minimum_index = 0)
@@ -325,7 +325,7 @@ class CSV
     end
 
     #
-    # Collapses the row into a simple Hash.  Be warned that this discards field
+    # Collapses the row into a simple Hash. Be warned that this discards field
     # order and clobbers duplicate fields.
     #
     def to_h
@@ -340,7 +340,7 @@ class CSV
     alias_method :to_ary, :to_a
 
     #
-    # Returns the row as a CSV String.  Headers are not used.  Equivalent to:
+    # Returns the row as a CSV String. Headers are not used. Equivalent to:
     #
     #   csv_row.fields.to_csv( options )
     #
@@ -367,7 +367,9 @@ class CSV
       end
     end
 
+    #
     # A summary of fields, by header, in an ASCII compatible String.
+    #
     def inspect
       str = ["#<", self.class.to_s]
       each do |header, field|
diff --git a/lib/csv/table.rb b/lib/csv/table.rb
index 29b188a6d7..e6c1ee11fa 100644
--- a/lib/csv/table.rb
+++ b/lib/csv/table.rb
@@ -5,7 +5,7 @@ require "forwardable"
 class CSV
   #
   # A CSV::Table is a two-dimensional data structure for representing CSV
-  # documents.  Tables allow you to work with the data by row or column,
+  # documents. Tables allow you to work with the data by row or column,
   # manipulate the data, and even convert the results back to CSV, if needed.
   #
   # All tables returned by CSV will be constructed from this class, if header
@@ -13,8 +13,8 @@ class CSV
   #
   class Table
     #
-    # Construct a new CSV::Table from +array_of_rows+, which are expected
-    # to be CSV::Row objects.  All rows are assumed to have the same headers.
+    # Constructs a new CSV::Table from +array_of_rows+, which are expected
+    # to be CSV::Row objects. All rows are assumed to have the same headers.
     #
     # The optional +headers+ parameter can be set to Array of headers.
     # If headers aren't set, headers are fetched from CSV::Row objects.
@@ -55,11 +55,11 @@ class CSV
     def_delegators :@table, :empty?, :length, :size
 
     #
-    # Returns a duplicate table object, in column mode.  This is handy for
+    # Returns a duplicate table object, in column mode. This is handy for
     # chaining in a single call without changing the table mode, but be aware
     # that this method can consume a fair amount of memory for bigger data sets.
     #
-    # This method returns the duplicate table for chaining.  Don't chain
+    # This method returns the duplicate table for chaining. Don't chain
     # destructive methods (like []=()) this way though, since you are working
     # with a duplicate.
     #
@@ -68,7 +68,7 @@ class CSV
     end
 
     #
-    # Switches the mode of this table to column mode.  All calls to indexing and
+    # Switches the mode of this table to column mode. All calls to indexing and
     # iteration methods will work with columns until the mode is changed again.
     #
     # This method returns the table and is safe to chain.
@@ -80,7 +80,7 @@ class CSV
     end
 
     #
-    # Returns a duplicate table object, in mixed mode.  This is handy for
+    # Returns a duplicate table object, in mixed mode. This is handy for
     # chaining in a single call without changing the table mode, but be aware
     # that this method can consume a fair amount of memory for bigger data sets.
     #
@@ -93,9 +93,9 @@ class CSV
     end
 
     #
-    # Switches the mode of this table to mixed mode.  All calls to indexing and
+    # Switches the mode of this table to mixed mode. All calls to indexing and
     # iteration methods will use the default intelligent indexing system until
-    # the mode is changed again.  In mixed mode an index is assumed to be a row
+    # the mode is changed again. In mixed mode an index is assumed to be a row
     # reference while anything else is assumed to be column access by headers.
     #
     # This method returns the table and is safe to chain.
@@ -120,7 +120,7 @@ class CSV
     end
 
     #
-    # Switches the mode of this table to row mode.  All calls to indexing and
+    # Switches the mode of this table to row mode. All calls to indexing and
     # iteration methods will work with rows until the mode is changed again.
     #
     # This method returns the table and is safe to chain.
@@ -146,7 +146,7 @@ class CSV
 
     #
     # In the default mixed mode, this method returns rows for index access and
-    # columns for header access.  You can force the index association by first
+    # columns for header access. You can force the index association by first
     # calling by_col!() or by_row!().
     #
     # Columns are returned as an Array of values.  Altering that Array has no
@@ -163,18 +163,18 @@ class CSV
 
     #
     # In the default mixed mode, this method assigns rows for index access and
-    # columns for header access.  You can force the index association by first
+    # columns for header access. You can force the index association by first
     # calling by_col!() or by_row!().
     #
     # Rows may be set to an Array of values (which will inherit the table's
     # headers()) or a CSV::Row.
     #
     # Columns may be set to a single value, which is copied to each row of the
-    # column, or an Array of values.  Arrays of values are assigned to rows top
-    # to bottom in row major order.  Excess values are ignored and if the Array
+    # column, or an Array of values. Arrays of values are assigned to rows top
+    # to bottom in row major order. Excess values are ignored and if the Array
     # does not have a value for each row the extra rows will receive a +nil+.
     #
-    # Assigning to an existing column or row clobbers the data.  Assigning to
+    # Assigning to an existing column or row clobbers the data. Assigning to
     # new columns creates them at the right end of the table.
     #
     def []=(index_or_header, value)
@@ -212,9 +212,9 @@ class CSV
 
     #
     # The mixed mode default is to treat a list of indices as row access,
-    # returning the rows indicated.  Anything else is considered columnar
-    # access.  For columnar access, the return set has an Array for each row
-    # with the values indicated by the headers in each Array.  You can force
+    # returning the rows indicated. Anything else is considered columnar
+    # access. For columnar access, the return set has an Array for each row
+    # with the values indicated by the headers in each Array. You can force
     # column or row mode using by_col!() or by_row!().
     #
     # You cannot mix column and row access.
@@ -234,7 +234,7 @@ class CSV
     end
 
     #
-    # Adds a new row to the bottom end of this table.  You can provide an Array,
+    # Adds a new row to the bottom end of this table. You can provide an Array,
     # which will be converted to a CSV::Row (inheriting the table's headers()),
     # or a CSV::Row.
     #
@@ -251,7 +251,7 @@ class CSV
     end
 
     #
-    # A shortcut for appending multiple rows.  Equivalent to:
+    # A shortcut for appending multiple rows. Equivalent to:
     #
     #   rows.each { |row| self << row }
     #
@@ -264,9 +264,9 @@ class CSV
     end
 
     #
-    # Removes and returns the indicated columns or rows.  In the default mixed
+    # Removes and returns the indicated columns or rows. In the default mixed
     # mode indices refer to rows and everything else is assumed to be a column
-    # headers.  Use by_col!() or by_row!() to force the lookup.
+    # headers. Use by_col!() or by_row!() to force the lookup.
     #
     def delete(*indexes_or_headers)
       if indexes_or_headers.empty?
@@ -293,9 +293,9 @@ class CSV
     end
 
     #
-    # Removes any column or row for which the block returns +true+.  In the
+    # Removes any column or row for which the block returns +true+. In the
     # default mixed mode or row mode, iteration is the standard row major
-    # walking of rows.  In column mode, iteration will +yield+ two element
+    # walking of rows. In column mode, iteration will +yield+ two element
     # tuples containing the column name and an Array of values for that column.
     #
     # This method returns the table for chaining.
@@ -321,7 +321,7 @@ class CSV
 
     #
     # In the default mixed mode or row mode, iteration is the standard row major
-    # walking of rows.  In column mode, iteration will +yield+ two element
+    # walking of rows. In column mode, iteration will +yield+ two element
     # tuples containing the column name and an Array of values for that column.
     #
     # This method returns the table for chaining.
@@ -347,7 +347,7 @@ class CSV
     end
 
     #
-    # Returns the table as an Array of Arrays.  Headers will be the first row,
+    # Returns the table as an Array of Arrays. Headers will be the first row,
     # then all of the field rows will follow.
     #
     def to_a
@@ -360,7 +360,7 @@ class CSV
     end
 
     #
-    # Returns the table as a complete CSV String.  Headers will be listed first,
+    # Returns the table as a complete CSV String. Headers will be listed first,
     # then all of the field rows.
     #
     # This method assumes you want the Table.headers(), unless you explicitly
diff --git a/lib/csv/version.rb b/lib/csv/version.rb
index ce55373f02..072400fe01 100644
--- a/lib/csv/version.rb
+++ b/lib/csv/version.rb
@@ -2,5 +2,5 @@
 
 class CSV
   # The version of the installed library.
-  VERSION = "3.1.1"
+  VERSION = "3.1.2"
 end
diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb
index 1682ac03ea..9243d23641 100644
--- a/lib/csv/writer.rb
+++ b/lib/csv/writer.rb
@@ -6,7 +6,12 @@ require_relative "row"
 using CSV::MatchP if CSV.const_defined?(:MatchP)
 
 class CSV
+  # Note: Don't use this class directly. This is an internal class.
   class Writer
+    #
+    # A CSV::Writer receives an output, prepares the header, format and output.
+    # It allows us to write new rows in the object and rewind it.
+    #
     attr_reader :lineno
     attr_reader :headers
 
@@ -22,6 +27,9 @@ class CSV
       @fields_converter = @options[:fields_converter]
     end
 
+    #
+    # Adds a new row
+    #
     def <<(row)
       case row
       when Row
@@ -47,6 +55,9 @@ class CSV
       self
     end
 
+    #
+    # Winds back to the beginning
+    #
     def rewind
       @lineno = 0
       @headers = nil if @options[:headers].nil?
author	Sutou Kouhei <kou@cozmixng.org>	2019-10-12 14:03:21 +0900
committer	GitHub <noreply@github.com>	2019-10-12 14:03:21 +0900
commit	92df7d98b62f48cf21cdec522f2e7b34380fd718 (patch)
tree	a0d169e177ebd5607caefa26cef90cc70df48232 /lib/csv
parent	d6e68bb263e79cb802fa683d9c4139ddca2fd4f5 (diff)