diff options
Diffstat (limited to 'lib/csv/table.rb')
-rw-r--r-- | lib/csv/table.rb | 621 |
1 files changed, 0 insertions, 621 deletions
diff --git a/lib/csv/table.rb b/lib/csv/table.rb deleted file mode 100644 index 1ce0dd6daf..0000000000 --- a/lib/csv/table.rb +++ /dev/null @@ -1,621 +0,0 @@ -# frozen_string_literal: true - -require "forwardable" - -class CSV - # - # A CSV::Table is a two-dimensional data structure for representing CSV - # documents. Tables allow you to work with the data by row or column, - # manipulate the data, and even convert the results back to CSV, if needed. - # - # All tables returned by CSV will be constructed from this class, if header - # row processing is activated. - # - class Table - # - # Constructs a new CSV::Table from +array_of_rows+, which are expected - # to be CSV::Row objects. All rows are assumed to have the same headers. - # - # The optional +headers+ parameter can be set to Array of headers. - # If headers aren't set, headers are fetched from CSV::Row objects. - # Otherwise, headers() method will return headers being set in - # headers argument. - # - # A CSV::Table object supports the following Array methods through - # delegation: - # - # * empty?() - # * length() - # * size() - # - def initialize(array_of_rows, headers: nil) - @table = array_of_rows - @headers = headers - unless @headers - if @table.empty? - @headers = [] - else - @headers = @table.first.headers - end - end - - @mode = :col_or_row - end - - # The current access mode for indexing and iteration. - attr_reader :mode - - # Internal data format used to compare equality. - attr_reader :table - protected :table - - ### Array Delegation ### - - extend Forwardable - def_delegators :@table, :empty?, :length, :size - - # - # Returns a duplicate table object, in column mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. - # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. - # - def by_col - self.class.new(@table.dup).by_col! - end - - # - # Switches the mode of this table to column mode. All calls to indexing and - # iteration methods will work with columns until the mode is changed again. - # - # This method returns the table and is safe to chain. - # - def by_col! - @mode = :col - - self - end - - # - # Returns a duplicate table object, in mixed mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. - # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. - # - def by_col_or_row - self.class.new(@table.dup).by_col_or_row! - end - - # - # Switches the mode of this table to mixed mode. All calls to indexing and - # iteration methods will use the default intelligent indexing system until - # the mode is changed again. In mixed mode an index is assumed to be a row - # reference while anything else is assumed to be column access by headers. - # - # This method returns the table and is safe to chain. - # - def by_col_or_row! - @mode = :col_or_row - - self - end - - # - # Returns a duplicate table object, in row mode. This is handy for chaining - # in a single call without changing the table mode, but be aware that this - # method can consume a fair amount of memory for bigger data sets. - # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. - # - def by_row - self.class.new(@table.dup).by_row! - end - - # - # Switches the mode of this table to row mode. All calls to indexing and - # iteration methods will work with rows until the mode is changed again. - # - # This method returns the table and is safe to chain. - # - def by_row! - @mode = :row - - self - end - - # - # Returns the headers for the first row of this table (assumed to match all - # other rows). The headers Array passed to CSV::Table.new is returned for - # empty tables. - # - def headers - if @table.empty? - @headers.dup - else - @table.first.headers - end - end - - # :call-seq: - # table[n] -> row - # table[range] -> array_of_rows - # table[header] -> array_of_fields - # - # Returns data from the table; does not modify the table. - # - # --- - # - # The expression <tt>table[n]</tt>, where +n+ is a non-negative \Integer, - # returns the +n+th row of the table, if that row exists, - # and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # table.by_row! # => #<CSV::Table mode:row row_count:4> - # table[1] # => #<CSV::Row "Name":"bar" "Value":"1"> - # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> - # table[1] # => #<CSV::Row "Name":"bar" "Value":"1"> - # - # Counts backward from the last row if +n+ is negative: - # table[-1] # => #<CSV::Row "Name":"baz" "Value":"2"> - # - # Returns +nil+ if +n+ is too large or too small: - # table[4] # => nil - # table[-4] => nil - # - # Raises an exception if the access mode is <tt>:row</tt> - # and +n+ is not an - # {Integer-convertible object}[https://docs.ruby-lang.org/en/master/implicit_conversion_rdoc.html#label-Integer-Convertible+Objects]. - # table.by_row! # => #<CSV::Table mode:row row_count:4> - # # Raises TypeError (no implicit conversion of String into Integer): - # table['Name'] - # - # --- - # - # The expression <tt>table[range]</tt>, where +range+ is a Range object, - # returns rows from the table, beginning at row <tt>range.first</tt>, - # if those rows exist, and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # table.by_row! # => #<CSV::Table mode:row row_count:4> - # rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1"> - # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] - # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> - # rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1"> - # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] - # - # If there are too few rows, returns all from <tt>range.first</tt> to the end: - # rows = table[1..50] # => #<CSV::Row "Name":"bar" "Value":"1"> - # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] - # - # Special case: if <tt>range.start == table.size</tt>, returns an empty \Array: - # table[table.size..50] # => [] - # - # If <tt>range.end</tt> is negative, calculates the ending index from the end: - # rows = table[0..-1] - # rows # => [#<CSV::Row "Name":"foo" "Value":"0">, #<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] - # - # If <tt>range.start</tt> is negative, calculates the starting index from the end: - # rows = table[-1..2] - # rows # => [#<CSV::Row "Name":"baz" "Value":"2">] - # - # If <tt>range.start</tt> is larger than <tt>table.size</tt>, returns +nil+: - # table[4..4] # => nil - # - # --- - # - # The expression <tt>table[header]</tt>, where +header+ is a \String, - # returns column values (\Array of \Strings) if the column exists - # and if the access mode is <tt>:col</tt> or <tt>:col_or_row</tt>: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # table.by_col! # => #<CSV::Table mode:col row_count:4> - # table['Name'] # => ["foo", "bar", "baz"] - # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> - # col = table['Name'] - # col # => ["foo", "bar", "baz"] - # - # Modifying the returned column values does not modify the table: - # col[0] = 'bat' - # col # => ["bat", "bar", "baz"] - # table['Name'] # => ["foo", "bar", "baz"] - # - # Returns an \Array of +nil+ values if there is no such column: - # table['Nosuch'] # => [nil, nil, nil] - def [](index_or_header) - if @mode == :row or # by index - (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range))) - @table[index_or_header] - else # by header - @table.map { |row| row[index_or_header] } - end - end - - # - # In the default mixed mode, this method assigns rows for index access and - # columns for header access. You can force the index association by first - # calling by_col!() or by_row!(). - # - # Rows may be set to an Array of values (which will inherit the table's - # headers()) or a CSV::Row. - # - # Columns may be set to a single value, which is copied to each row of the - # column, or an Array of values. Arrays of values are assigned to rows top - # to bottom in row major order. Excess values are ignored and if the Array - # does not have a value for each row the extra rows will receive a +nil+. - # - # Assigning to an existing column or row clobbers the data. Assigning to - # new columns creates them at the right end of the table. - # - def []=(index_or_header, value) - if @mode == :row or # by index - (@mode == :col_or_row and index_or_header.is_a? Integer) - if value.is_a? Array - @table[index_or_header] = Row.new(headers, value) - else - @table[index_or_header] = value - end - else # set column - unless index_or_header.is_a? Integer - index = @headers.index(index_or_header) || @headers.size - @headers[index] = index_or_header - end - if value.is_a? Array # multiple values - @table.each_with_index do |row, i| - if row.header_row? - row[index_or_header] = index_or_header - else - row[index_or_header] = value[i] - end - end - else # repeated value - @table.each do |row| - if row.header_row? - row[index_or_header] = index_or_header - else - row[index_or_header] = value - end - end - end - end - end - - # :call-seq: - # table.values_at(*indexes) -> array_of_rows - # table.values_at(*headers) -> array_of_columns_data - # - # If the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>, - # and each argument is either an \Integer or a \Range, - # returns rows. - # Otherwise, returns columns data. - # - # In either case, the returned values are in the order - # specified by the arguments. Arguments may be repeated. - # - # --- - # - # Returns rows as an \Array of \CSV::Row objects. - # - # No argument: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # table.values_at # => [] - # - # One index: - # values = table.values_at(0) - # values # => [#<CSV::Row "Name":"foo" "Value":"0">] - # - # Two indexes: - # values = table.values_at(2, 0) - # values # => [#<CSV::Row "Name":"baz" "Value":"2">, #<CSV::Row "Name":"foo" "Value":"0">] - # - # One \Range: - # values = table.values_at(1..2) - # values # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] - # - # \Ranges and indexes: - # values = table.values_at(0..1, 1..2, 0, 2) - # pp values - # Output: - # [#<CSV::Row "Name":"foo" "Value":"0">, - # #<CSV::Row "Name":"bar" "Value":"1">, - # #<CSV::Row "Name":"bar" "Value":"1">, - # #<CSV::Row "Name":"baz" "Value":"2">, - # #<CSV::Row "Name":"foo" "Value":"0">, - # #<CSV::Row "Name":"baz" "Value":"2">] - # - # --- - # - # Returns columns data as row Arrays, - # each consisting of the specified columns data for that row: - # values = table.values_at('Name') - # values # => [["foo"], ["bar"], ["baz"]] - # values = table.values_at('Value', 'Name') - # values # => [["0", "foo"], ["1", "bar"], ["2", "baz"]] - def values_at(*indices_or_headers) - if @mode == :row or # by indices - ( @mode == :col_or_row and indices_or_headers.all? do |index| - index.is_a?(Integer) or - ( index.is_a?(Range) and - index.first.is_a?(Integer) and - index.last.is_a?(Integer) ) - end ) - @table.values_at(*indices_or_headers) - else # by headers - @table.map { |row| row.values_at(*indices_or_headers) } - end - end - - # :call-seq: - # table << row_or_array -> self - # - # If +row_or_array+ is a \CSV::Row object, - # it is appended to the table: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # table << CSV::Row.new(table.headers, ['bat', 3]) - # table[3] # => #<CSV::Row "Name":"bat" "Value":3> - # - # If +row_or_array+ is an \Array, it is used to create a new - # \CSV::Row object which is then appended to the table: - # table << ['bam', 4] - # table[4] # => #<CSV::Row "Name":"bam" "Value":4> - def <<(row_or_array) - if row_or_array.is_a? Array # append Array - @table << Row.new(headers, row_or_array) - else # append Row - @table << row_or_array - end - - self # for chaining - end - - # - # :call-seq: - # table.push(*rows_or_arrays) -> self - # - # A shortcut for appending multiple rows. Equivalent to: - # rows.each {|row| self << row } - # - # Each argument may be either a \CSV::Row object or an \Array: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # rows = [ - # CSV::Row.new(table.headers, ['bat', 3]), - # ['bam', 4] - # ] - # table.push(*rows) - # table[3..4] # => [#<CSV::Row "Name":"bat" "Value":3>, #<CSV::Row "Name":"bam" "Value":4>] - def push(*rows) - rows.each { |row| self << row } - - self # for chaining - end - - # :call-seq: - # table.delete(*indexes) -> deleted_values - # table.delete(*headers) -> deleted_values - # - # If the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>, - # and each argument is either an \Integer or a \Range, - # returns deleted rows. - # Otherwise, returns deleted columns data. - # - # In either case, the returned values are in the order - # specified by the arguments. Arguments may be repeated. - # - # --- - # - # Returns rows as an \Array of \CSV::Row objects. - # - # One index: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # deleted_values = table.delete(0) - # deleted_values # => [#<CSV::Row "Name":"foo" "Value":"0">] - # - # Two indexes: - # table = CSV.parse(source, headers: true) - # deleted_values = table.delete(2, 0) - # deleted_values # => [#<CSV::Row "Name":"baz" "Value":"2">, #<CSV::Row "Name":"foo" "Value":"0">] - # - # --- - # - # Returns columns data as column Arrays. - # - # One header: - # table = CSV.parse(source, headers: true) - # deleted_values = table.delete('Name') - # deleted_values # => ["foo", "bar", "baz"] - # - # Two headers: - # table = CSV.parse(source, headers: true) - # deleted_values = table.delete('Value', 'Name') - # deleted_values # => [["0", "1", "2"], ["foo", "bar", "baz"]] - def delete(*indexes_or_headers) - if indexes_or_headers.empty? - raise ArgumentError, "wrong number of arguments (given 0, expected 1+)" - end - deleted_values = indexes_or_headers.map do |index_or_header| - if @mode == :row or # by index - (@mode == :col_or_row and index_or_header.is_a? Integer) - @table.delete_at(index_or_header) - else # by header - if index_or_header.is_a? Integer - @headers.delete_at(index_or_header) - else - @headers.delete(index_or_header) - end - @table.map { |row| row.delete(index_or_header).last } - end - end - if indexes_or_headers.size == 1 - deleted_values[0] - else - deleted_values - end - end - - # Removes rows or columns for which the block returns a truthy value; - # returns +self+. - # - # Removes rows when the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>; - # calls the block with each \CSV::Row object: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # table.by_row! # => #<CSV::Table mode:row row_count:4> - # table.size # => 3 - # table.delete_if {|row| row['Name'].start_with?('b') } - # table.size # => 1 - # - # Removes columns when the access mode is <tt>:col</tt>; - # calls the block with each column as a 2-element array - # containing the header and an \Array of column fields: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # table.by_col! # => #<CSV::Table mode:col row_count:4> - # table.headers.size # => 2 - # table.delete_if {|column_data| column_data[1].include?('2') } - # table.headers.size # => 1 - # - # Returns a new \Enumerator if no block is given: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # table.delete_if # => #<Enumerator: #<CSV::Table mode:col_or_row row_count:4>:delete_if> - def delete_if(&block) - return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size } unless block_given? - - if @mode == :row or @mode == :col_or_row # by index - @table.delete_if(&block) - else # by header - deleted = [] - headers.each do |header| - deleted << delete(header) if yield([header, self[header]]) - end - end - - self # for chaining - end - - include Enumerable - - # Calls the block with each row or column; returns +self+. - # - # When the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>, - # calls the block with each \CSV::Row object: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # table.by_row! # => #<CSV::Table mode:row row_count:4> - # table.each {|row| p row } - # Output: - # #<CSV::Row "Name":"foo" "Value":"0"> - # #<CSV::Row "Name":"bar" "Value":"1"> - # #<CSV::Row "Name":"baz" "Value":"2"> - # - # When the access mode is <tt>:col</tt>, - # calls the block with each column as a 2-element array - # containing the header and an \Array of column fields: - # table.by_col! # => #<CSV::Table mode:col row_count:4> - # table.each {|column_data| p column_data } - # Output: - # ["Name", ["foo", "bar", "baz"]] - # ["Value", ["0", "1", "2"]] - # - # Returns a new \Enumerator if no block is given: - # table.each # => #<Enumerator: #<CSV::Table mode:col row_count:4>:each> - def each(&block) - return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given? - - if @mode == :col - headers.each { |header| yield([header, self[header]]) } - else - @table.each(&block) - end - - self # for chaining - end - - # Returns +true+ if all each row of +self+ <tt>==</tt> - # the corresponding row of +other_table+, otherwise, +false+. - # - # The access mode does no affect the result. - # - # Equal tables: - # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" - # table = CSV.parse(source, headers: true) - # other_table = CSV.parse(source, headers: true) - # table == other_table # => true - # - # Different row count: - # other_table.delete(2) - # table == other_table # => false - # - # Different last row: - # other_table << ['bat', 3] - # table == other_table # => false - def ==(other) - return @table == other.table if other.is_a? CSV::Table - @table == other - end - - # - # Returns the table as an Array of Arrays. Headers will be the first row, - # then all of the field rows will follow. - # - def to_a - array = [headers] - @table.each do |row| - array.push(row.fields) unless row.header_row? - end - - array - end - - # - # Returns the table as a complete CSV String. Headers will be listed first, - # then all of the field rows. - # - # This method assumes you want the Table.headers(), unless you explicitly - # pass <tt>:write_headers => false</tt>. - # - def to_csv(write_headers: true, **options) - array = write_headers ? [headers.to_csv(**options)] : [] - @table.each do |row| - array.push(row.fields.to_csv(**options)) unless row.header_row? - end - - array.join("") - end - alias_method :to_s, :to_csv - - # - # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step, - # returning nil if any intermediate step is nil. - # - def dig(index_or_header, *index_or_headers) - value = self[index_or_header] - if value.nil? - nil - elsif index_or_headers.empty? - value - else - unless value.respond_to?(:dig) - raise TypeError, "#{value.class} does not have \#dig method" - end - value.dig(*index_or_headers) - end - end - - # Shows the mode and size of this table in a US-ASCII String. - def inspect - "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII") - end - end -end |