summaryrefslogtreecommitdiff
path: root/lib/csv.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/csv.rb')
-rw-r--r--lib/csv.rb1202
1 files changed, 427 insertions, 775 deletions
diff --git a/lib/csv.rb b/lib/csv.rb
index cfc5489ea0..205c3385c4 100644
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -1,59 +1,32 @@
# CSV -- module for generating/parsing CSV data.
-
+
# $Id$
-
-# This module is copyrighted free software by NAKAMURA, Hiroshi.
-# You can redistribute it and/or modify it under the same term as Ruby.
-
-
+
+# This program is copyrighted free software by NAKAMURA, Hiroshi. You can
+# redistribute it and/or modify it under the same terms of Ruby's license;
+# either the dual license version in 2003, or any later version.
+
+
class CSV
-public
- # DESCRIPTION
- # CSV::Cell -- Describes 1 cell of CSV.
- #
+ # Describes a cell of CSV.
class Cell
- public
-
# Datum as string.
attr_accessor :data
-
- # Is this datum null?
+
+ # Is this datum NULL?
attr_accessor :is_null
- # SYNOPSIS
- # cell = CSV::Cell.new(data = '', is_null = true)
- #
- # ARGS
- # data: datum as String
- # is_null: is this datum null?
- #
- # RETURNS
- # cell: Created instance.
- #
- # DESCRIPTION
- # Create instance. If is_null is true, datum is stored in the instance
- # created but it should be treated as 'NULL'.
- #
+ # If is_null is true, datum is stored in the instance created but it
+ # should be treated as 'NULL'.
def initialize(data = '', is_null = true)
@data = data
@is_null = is_null
end
- # SYNOPSIS
- # CSV::Cell#match(rhs)
- #
- # ARGS
- # rhs: an instance of CSV::Cell to be compared.
- #
- # RETURNS
- # true/false. See the souce if you want to know matching algorithm.
- #
- # DESCRIPTION
- # Compare another cell with me. Bare in mind Null matches with Null
- # using this method. Use CSV::Cell#== if you want Null never matches
- # with other data including Null.
- #
+ # Compares another cell with self. Bear in mind NULL matches with NULL.
+ # Use CSV::Cell#== if you don't want NULL matches with NULL.
+ # rhs: an instance of CSV::Cell to be compared.
def match(rhs)
if @is_null and rhs.is_null
true
@@ -64,20 +37,9 @@ public
end
end
- # SYNOPSIS
- # CSV::Cell#==(rhs)
- #
- # ARGS
- # rhs: an instance of CSV::Cell to be compared.
- #
- # RETURNS
- # true/false. See the souce if you want to know matching algorithm.
- #
- # DESCRIPTION
- # Compare another cell with me. Bare in mind Null is not match with
- # Null using this method. Null never matches with other data including
- # Null. Use CSV::Cell#match if you want Null matches with Null.
- #
+ # Compares another cell with self. Bear in mind NULL does not match with
+ # NULL. Use CSV::Cell#match if you want NULL matches with NULL.
+ # rhs: an instance of CSV::Cell to be compared.
def ==(rhs)
if @is_null or rhs.is_null
false
@@ -100,39 +62,18 @@ public
@is_null ? nil : data
end
end
-
- # DESCRIPTION
- # CSV::Row -- Describes a row of CSV. Each element must be a CSV::Cell.
- #
+
+ # Describes a row of CSV. Each element must be a CSV::Cell.
class Row < Array
- public
-
- # SYNOPSIS
- # CSV::Row#to_a
- #
- # RETURNS
- # An Array of String.
- #
- # DESCRIPTION
- # Convert CSV::Cell to String. Null is converted to nil.
- #
+
+ # Returns the strings contained in the row's cells.
def to_a
self.collect { |cell| cell.is_null ? nil : cell.data }
end
- # SYNOPSIS
- # CSV::Row#match(rhs)
- #
- # ARGS
- # rhs: an Array of cells. Each cell is a instance of CSV::Cell.
- #
- # RETURNS
- # true/false. See the souce if you want to know matching algorithm.
- #
- # DESCRIPTION
- # Compare another row with me.
- #
+ # Compares another row with self.
+ # rhs: an Array of cells. Each cell should be a CSV::Cell.
def match(rhs)
if self.size != rhs.size
return false
@@ -147,62 +88,9 @@ public
end
- # SYNOPSIS
- # 1. reader = CSV.open(filename, 'r')
- #
- # 2. CSV.open(filename, 'r') do |row|
- # ...
- # end
- #
- # 3. writer = CSV.open(filename, 'w')
- #
- # 4. CSV.open(filename, 'w') do |writer|
- # ...
- # end
- #
- # ARGS
- # filename: filename to open.
- # mode: 'r' for read (parse)
- # 'w' for write (generate)
- # row: an Array of cells which is a parsed line.
- # writer: Created writer instance. See CSV::Writer#<< and
- # CSV::Writer#add_row to know how to generate CSV string.
- #
- # RETURNS
- # reader: Create reader instance. To get parse result, see
- # CSV::Reader#each.
- # writer: Created writer instance. See CSV::Writer#<< and
- # CSV::Writer#add_row to know how to generate CSV string.
- #
- # DESCRIPTION
- # Open a CSV formatted file to read or write.
- #
- # EXAMPLE 1
- # reader = CSV.open('csvfile.csv', 'r')
- # row1 = reader.shift
- # row2 = reader.shift
- # if row2.empty?
- # p 'row2 not find.'
- # end
- # reader.close
- #
- # EXAMPLE 2
- # CSV.open('csvfile.csv', 'r') do |row|
- # p row
- # end
- #
- # EXAMPLE 3
- # writer = CSV.open('csvfile.csv', 'w')
- # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
- # writer.close
- #
- # EXAMPLE 4
- # CSV.open('csvfile.csv', 'w') do |writer|
- # writer << ['r1c1', 'r1c2']
- # writer << ['r2c1', 'r2c2']
- # writer << [nil, nil]
- # end
- #
+ class IllegalFormatError < RuntimeError; end
+
+
def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block)
if mode == 'r' or mode == 'rb'
open_reader(filename, col_sep, row_sep, &block)
@@ -213,530 +101,71 @@ public
end
end
- def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block)
- open_reader(filename, col_sep, row_sep, &block)
- end
-
- def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block)
- open_writer(filename, col_sep, row_sep, &block)
- end
-
- # Private class methods.
- class << self
- private
- def open_reader(filename, col_sep, row_sep, &block)
- file = File.open(filename, 'rb')
- if block
- begin
- CSV::Reader.parse(file, col_sep, row_sep) do |row|
- yield(row)
- end
- ensure
- file.close
- end
- nil
- else
- reader = CSV::Reader.create(file, col_sep, row_sep)
- reader.close_on_terminate
- reader
- end
- end
-
- def open_writer(filename, col_sep, row_sep, &block)
- file = File.open(filename, 'wb')
- if block
- begin
- CSV::Writer.generate(file, col_sep, row_sep) do |writer|
- yield(writer)
- end
- ensure
- file.close
- end
- nil
- else
- writer = CSV::Writer.create(file, col_sep, row_sep)
- writer.close_on_terminate
- writer
- end
- end
- end
-
-
- # DESCRIPTION
- # CSV::Reader -- CSV formatted string/stream reader.
- #
- # EXAMPLE
- # Read CSV lines untill the first column is 'stop'.
+ # Open a CSV formatted file for reading.
#
- # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
- # p row
- # break if !row[0].is_null && row[0].data == 'stop'
+ # EXAMPLE 1
+ # reader = CSV.parse('csvfile.csv')
+ # row1 = reader.shift
+ # row2 = reader.shift
+ # if row2.empty?
+ # p 'row2 not find.'
# end
+ # reader.close
#
- class Reader
- include Enumerable
- public
-
- # SYNOPSIS
- # reader = CSV::Reader.create(str_or_readable)
- #
- # ARGS
- # str_or_readable: a CSV data to be parsed. A String or an IO.
- #
- # RETURNS
- # reader: Created instance.
- #
- # DESCRIPTION
- # Create instance. To get parse result, see CSV::Reader#each.
- #
- def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil)
- case str_or_readable
- when IO
- IOReader.new(str_or_readable, col_sep, row_sep)
- when String
- StringReader.new(str_or_readable, col_sep, row_sep)
- else
- IOReader.new(str_or_readable, col_sep, row_sep)
- end
- end
-
- # SYNOPSIS
- # CSV::Reader.parse(str_or_readable) do |row|
- # ...
- # end
- #
- # ARGS
- # str_or_readable: a CSV data to be parsed. A String or an IO.
- # row: a CSV::Row; an Array of a CSV::Cell in a line.
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Parse CSV data and get lines. Caller block is called for each line
- # with an argument which is a chunk of cells in a row.
- #
- # Block value is always nil. Rows are not cached for performance
- # reason.
- #
- def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil)
- reader = create(str_or_readable, col_sep, row_sep)
- reader.each do |row|
- yield(row)
- end
- reader.close
- nil
- end
-
- # SYNOPSIS
- # CSV::Reader#each do |row|
- # ...
- # end
- #
- # ARGS
- # row: a CSV::Row; an Array of a CSV::Cell in a line.
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Caller block is called for each line with an argument which is a chunk
- # of cells in a row.
- #
- # Block value is always nil. Rows are not cached for performance
- # reason.
- #
- def each
- while true
- row = Row.new
- parsed_cells = get_row(row)
- if parsed_cells == 0
- break
- end
- yield(row)
- end
- nil
- end
-
- # SYNOPSIS
- # cell = CSV::Reader#shift
- #
- # RETURNS
- # cell: a CSV::Row; an Array of a CSV::Cell.
- #
- # DESCRIPTION
- # Extract cells of next line.
- #
- def shift
- row = Row.new
- parsed_cells = get_row(row)
- row
- end
-
- # SYNOPSIS
- # CSV::Reader#close
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Close this reader.
- #
- def close
- terminate
- end
-
- private
- def initialize(dev)
- raise RuntimeError.new('Do not instanciate this class directly.')
- end
-
- def get_row(row)
- raise NotImplementedError.new('Method get_row must be defined in a derived class.')
- end
-
- def terminate
- # Define if needed.
- end
- end
-
-
- # DESCRIPTION
- # CSV::StringReader -- CSV formatted stream reader.
- #
- # EXAMPLE
- # Read CSV lines untill the first column is 'stop'.
- #
- # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
+ # EXAMPLE 2
+ # CSV.parse('csvfile.csv') do |row|
# p row
- # break if !row[0].is_null && row[0].data == 'stop'
# end
#
- class StringReader < Reader
- public
-
- # SYNOPSIS
- # reader = CSV::StringReader.new(string)
- #
- # ARGS
- # string: a CSV String to be parsed.
- #
- # RETURNS
- # reader: Created instance.
- #
- # DESCRIPTION
- # Create instance. To get parse result, see CSV::Reader#each.
- #
- def initialize(string, col_sep = ?,, row_sep = nil)
- @col_sep = col_sep
- @row_sep = row_sep
- @dev = string
- @idx = 0
- if @dev[0, 3] == "\xef\xbb\xbf"
- @idx += 3
- end
- end
-
- private
- def get_row(row)
- parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
- if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
- raise IllegalFormatError.new
- end
- @idx = next_idx
- parsed_cells
- end
- end
-
-
- # DESCRIPTION
- # CSV::IOReader -- CSV formatted stream reader.
- #
- # EXAMPLE
- # Read CSV lines untill the first column is 'stop'.
+ # ARGS
+ # filename: filename to parse.
+ # col_sep: Column separator. ?, by default. If you want to separate
+ # fields with semicolon, give ?; here.
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
+ # want to separate records with \r, give ?\r here.
#
- # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
- # p row
- # break if !row[0].is_null && row[0].data == 'stop'
- # end
+ # RETURNS
+ # reader instance. To get parse result, see CSV::Reader#each.
#
- class IOReader < Reader
- public
-
- # SYNOPSIS
- # reader = CSV::IOReader.new(io)
- #
- # ARGS
- # io: a CSV data to be parsed. Must be an IO. (io#read is called.)
- #
- # RETURNS
- # reader: Created instance.
- #
- # DESCRIPTION
- # Create instance. To get parse result, see CSV::Reader#each.
- #
- def initialize(io, col_sep = ?,, row_sep = nil)
- @io = io
- @io.binmode if @io.respond_to?(:binmode)
- @col_sep = col_sep
- @row_sep = row_sep
- @dev = CSV::IOBuf.new(@io)
- @idx = 0
- if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
- @idx += 3
- end
- @close_on_terminate = false
- end
-
- # SYNOPSIS
- # CSV::IOReader#close_on_terminate
- #
- # RETURNS
- # true
- #
- # DESCRIPTION
- # Tell this reader to close the IO when terminated (Triggered by invoking
- # CSV::IOReader#close).
- #
- def close_on_terminate
- @close_on_terminate = true
- end
-
- private
- def get_row(row)
- parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
- if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
- raise IllegalFormatError.new
- end
- dropped = @dev.drop(next_idx)
- @idx = next_idx - dropped
- parsed_cells
- end
-
- def terminate
- if @close_on_terminate
- @io.close
- end
-
- if @dev
- @dev.close
- end
- end
+ def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block)
+ open_reader(filename, col_sep, row_sep, &block)
end
-
- # DESCRIPTION
- # CSV::Writer -- CSV formatted string/stream writer.
- #
- # EXAMPLE
- # Write rows to 'csvout' file.
- #
- # outfile = File.open('csvout', 'wb')
- # CSV::Writer.generate(outfile) do |csv|
- # csv << ['c1', nil, '', '"', "\r\n", 'c2']
- # # or
- # csv.add_row [
- # CSV::Cell.new('c1', false),
- # CSV::Cell.new('dummy', true),
- # CSV::Cell.new('', false),
- # CSV::Cell.new('"', false),
- # CSV::Cell.new("\r\n", false)
- # CSV::Cell.new('c2', false)
- # ]
- # ...
- # ...
- # end
+ # Open a CSV formatted file for writing.
#
- # outfile.close
+ # EXAMPLE 1
+ # writer = CSV.generate('csvfile.csv')
+ # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
+ # writer.close
#
- class Writer
- public
-
- # SYNOPSIS
- # writer = CSV::Writer.create(str_or_readable)
- #
- # ARGS
- # str_or_writable: device for generated CSV string. Must respond to
- # '<<(string)'.
- #
- # RETURNS
- # writer: Created instance.
- #
- # DESCRIPTION
- # Create instance. To add CSV data to generate CSV string, see
- # CSV::Writer#<< or CSV::Writer#add_row.
- #
- def Writer.create(str_or_writable, col_sep = ?,, row_sep = nil)
- BasicWriter.new(str_or_writable, col_sep, row_sep)
- end
-
- # SYNOPSIS
- # CSV::Writer.generate(str_or_writable) do |writer|
- # ...
- # end
- #
- # ARGS
- # str_or_writable: device for generated CSV string. Must respond to
- # '<<(string)'.
- # writer: Created writer instance. See CSV::Writer#<< and
- # CSV::Writer#add_row to know how to generate CSV string.
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Create writer instance. Caller block is called with the new instance.
- # To add CSV data to generate CSV string, see CSV::Writer#<< or
- # CSV::Writer#add_row.
- #
- def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil)
- writer = Writer.create(str_or_writable, col_sep, row_sep)
- yield(writer)
- writer.close
- nil
- end
-
- # SYNOPSIS
- # CSV::Writer#<<(row)
- #
- # ARGS
- # row: an Array of a String.
- #
- # RETURNS
- # self
- #
- # DESCRIPTION
- # Dump CSV stream to the device. Argument is an array of a String like
- # ['c1', 'c2', 'c3'].
- #
- def <<(ary)
- row = ary.collect { |item|
- if item.is_a?(Cell)
- item
- elsif (item.nil?)
- Cell.new('', true)
- else
- Cell.new(item.to_s, false)
- end
- }
- CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
- self
- end
-
- # SYNOPSIS
- # CSV::Writer#<<(row)
- #
- # ARGS
- # row: an Array of a CSV::Cell.
- #
- # RETURNS
- # self
- #
- # DESCRIPTION
- # Dump CSV stream to the device. Argument is an array of a CSV::Cell
- # like [CSV::Cell.new('c1', false), CSV::Cell.new('dummy', true)].
- # (Formar is 'c1' and latter is Null.)
- #
- def add_row(row)
- CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
- self
- end
-
- # SYNOPSIS
- # CSV::Writer#close
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Close this writer.
- #
- def close
- terminate
- end
-
- private
- def initialize(dev)
- raise RuntimeError.new('Do not instanciate this class directly.')
- end
-
- def terminate
- # Define if needed.
- end
- end
-
-
- # DESCRIPTION
- # CSV::BasicWriter -- CSV formatted string/stream writer using <<.
- #
- class BasicWriter < Writer
- public
-
- # SYNOPSIS
- # writer = CSV::BasicWriter.new(str_or_writable)
- #
- # ARGS
- # str_or_writable: device for generated CSV string. Must respond to
- # '<<(string)'.
- #
- # RETURNS
- # writer: Created instance.
- #
- # DESCRIPTION
- # Create instance. To add CSV data to generate CSV string, see
- # CSV::Writer#<< or CSV::Writer#add_row.
- #
- def initialize(str_or_writable, col_sep = ?,, row_sep = nil)
- @col_sep = col_sep
- @row_sep = row_sep
- @dev = str_or_writable
- @dev.binmode if @dev.respond_to?(:binmode)
- @close_on_terminate = false
- end
-
- # SYNOPSIS
- # CSV::BasicWriter#close_on_terminate
- #
- # RETURNS
- # true
- #
- # DESCRIPTION
- # Tell this writer to close the IO when terminated (Triggered by invoking
- # CSV::BasicWriter#close).
- #
- def close_on_terminate
- @close_on_terminate = true
- end
-
- private
- def terminate
- if @close_on_terminate
- @dev.close
- end
- end
- end
-
- # SYNOPSIS
- # cells = CSV.parse_line(src, col_sep = ?,, row_sep = nil)
+ # EXAMPLE 2
+ # CSV.generate('csvfile.csv') do |writer|
+ # writer << ['r1c1', 'r1c2']
+ # writer << ['r2c1', 'r2c2']
+ # writer << [nil, nil]
+ # end
#
# ARGS
- # src: a CSV String.
+ # filename: filename to generate.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
#
# RETURNS
- # cells: an Array of parsed cells in first line. Each cell is a String.
+ # writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
+ # to generate CSV string.
#
- # DESCRIPTION
- # Parse one line from given string. Bare in mind it parses ONE LINE. Rest
- # of the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
- # second line 'c,d' is ignored.
+ def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block)
+ open_writer(filename, col_sep, row_sep, &block)
+ end
+
+ # Parse a line from given string. Bear in mind it parses ONE LINE. Rest of
+ # the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
+ # second line 'c,d' is ignored.
#
- # If you don't know whether a target string to parse is exactly 1 line or
- # not, use CSV.parse_row instead of this method.
- #
+ # If you don't know whether a target string to parse is exactly 1 line or
+ # not, use CSV.parse_row instead of this method.
def CSV.parse_line(src, col_sep = ?,, row_sep = nil)
idx = 0
res_type = :DT_COLSEP
@@ -752,25 +181,8 @@ public
end
cells
end
-
- # SYNOPSIS
- # str = CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
- #
- # ARGS
- # cells: an Array of cell to be converted to CSV string. Each cell must
- # respond to 'to_s'.
- # col_sep: Column separator. ?, by default. If you want to separate
- # fields with semicolon, give ?; here.
- # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
- # want to separate records with \r, give ?\r here.
- #
- # RETURNS
- # str: a String of generated CSV string.
- #
- # DESCRIPTION
- # Create a line from cells. Each cell is stringified by to_s.
- #
+ # Create a line from cells. each cell is stringified by to_s.
def CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
if (cells.size == 0)
return ''
@@ -793,9 +205,20 @@ public
end
result_str
end
-
- # SYNOPSIS
- # parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
+
+
+ # Parse a line from string. Consider using CSV.parse_line instead.
+ # To parse lines in CSV string, see EXAMPLE below.
+ #
+ # EXAMPLE
+ # src = "a,b\r\nc,d\r\ne,f"
+ # idx = 0
+ # begin
+ # parsed = []
+ # parsed_cells, idx = CSV.parse_row(src, idx, parsed)
+ # puts "Parsed #{ parsed_cells } cells."
+ # p parsed
+ # end while parsed_cells > 0
#
# ARGS
# src: a CSV data to be parsed. Must respond '[](idx)'.
@@ -813,20 +236,6 @@ public
# parsed_cells: num of parsed cells.
# idx: index of next parsing location of 'src'.
#
- # DESCRIPTION
- # Parse a line from string. To parse lines in CSV string, see EXAMPLE
- # below.
- #
- # EXAMPLE
- # src = "a,b\r\nc,d\r\ne,f"
- # idx = 0
- # begin
- # parsed = []
- # parsed_cells, idx = CSV.parse_row(src, idx, parsed)
- # puts "Parsed #{ parsed_cells } cells."
- # p parsed
- # end while parsed_cells > 0
- #
def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
idx_backup = idx
parsed_cells = 0
@@ -849,26 +258,10 @@ public
end
return parsed_cells, idx
end
-
- # SYNOPSIS
- # parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
- #
- # ARGS
- # src: an Array of CSV::Cell to be converted to CSV string. Must respond to
- # 'size' and '[](idx)'. src[idx] must return CSV::Cell.
- # cells: num of cells in a line.
- # out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
- # col_sep: Column separator. ?, by default. If you want to separate
- # fields with semicolon, give ?; here.
- # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
- # want to separate records with \r, give ?\r here.
- #
- # RETURNS
- # parsed_cells: num of converted cells.
- #
- # DESCRIPTION
- # Convert a line from cells data to string. To generate multi-row CSV
- # string, See EXAMPLE below.
+
+
+ # Convert a line from cells data to string. Consider using CSV.generate_line
+ # instead. To generate multi-row CSV string, see EXAMPLE below.
#
# EXAMPLE
# def d(str)
@@ -885,7 +278,20 @@ public
# puts "Created #{ parsed_cells } cells."
# end
# p buf
- #
+ #
+ # ARGS
+ # src: an Array of CSV::Cell to be converted to CSV string. Must respond to
+ # 'size' and '[](idx)'. src[idx] must return CSV::Cell.
+ # cells: num of cells in a line.
+ # out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
+ # col_sep: Column separator. ?, by default. If you want to separate
+ # fields with semicolon, give ?; here.
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
+ # want to separate records with \r, give ?\r here.
+ #
+ # RETURNS
+ # parsed_cells: num of converted cells.
+ #
def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
src_size = src.size
if (src_size == 0)
@@ -910,14 +316,46 @@ public
end
parsed_cells
end
-
-private
- class IllegalFormatError < RuntimeError; end
- # Private class methods.
class << self
private
+ def open_reader(filename, col_sep, row_sep, &block)
+ file = File.open(filename, 'rb')
+ if block
+ begin
+ CSV::Reader.parse(file, col_sep, row_sep) do |row|
+ yield(row)
+ end
+ ensure
+ file.close
+ end
+ nil
+ else
+ reader = CSV::Reader.create(file, col_sep, row_sep)
+ reader.close_on_terminate
+ reader
+ end
+ end
+
+ def open_writer(filename, col_sep, row_sep, &block)
+ file = File.open(filename, 'wb')
+ if block
+ begin
+ CSV::Writer.generate(file, col_sep, row_sep) do |writer|
+ yield(writer)
+ end
+ ensure
+ file.close
+ end
+ nil
+ else
+ writer = CSV::Writer.create(file, col_sep, row_sep)
+ writer.close_on_terminate
+ writer
+ end
+ end
+
def parse_body(src, idx, cell, col_sep, row_sep)
row_sep_end = row_sep || ?\n
cell.is_null = false
@@ -1031,7 +469,7 @@ private
end
return :DT_EOS, idx
end
-
+
def generate_body(cells, out_dev, col_sep, row_sep)
row_data = cells.data.dup
if (!cells.is_null)
@@ -1046,7 +484,7 @@ private
end
end
end
-
+
def generate_separator(type, out_dev, col_sep, row_sep)
case type
when :DT_COLSEP
@@ -1058,8 +496,255 @@ private
end
- # DESCRIPTION
- # CSV::StreamBuf -- a class for a bufferd stream.
+ # CSV formatted string/stream reader.
+ #
+ # EXAMPLE
+ # read CSV lines untill the first column is 'stop'.
+ #
+ # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
+ # p row
+ # break if !row[0].is_null && row[0].data == 'stop'
+ # end
+ #
+ class Reader
+ include Enumerable
+
+ # Parse CSV data and get lines. Given block is called for each parsed row.
+ # Block value is always nil. Rows are not cached for performance reason.
+ def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil)
+ reader = create(str_or_readable, col_sep, row_sep)
+ reader.each do |row|
+ yield(row)
+ end
+ reader.close
+ nil
+ end
+
+ # Returns reader instance.
+ def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil)
+ case str_or_readable
+ when IO
+ IOReader.new(str_or_readable, col_sep, row_sep)
+ when String
+ StringReader.new(str_or_readable, col_sep, row_sep)
+ else
+ IOReader.new(str_or_readable, col_sep, row_sep)
+ end
+ end
+
+ def each
+ while true
+ row = Row.new
+ parsed_cells = get_row(row)
+ if parsed_cells == 0
+ break
+ end
+ yield(row)
+ end
+ nil
+ end
+
+ def shift
+ row = Row.new
+ parsed_cells = get_row(row)
+ row
+ end
+
+ def close
+ terminate
+ end
+
+ private
+
+ def initialize(dev)
+ raise RuntimeError.new('do not instanciate this class directly')
+ end
+
+ def get_row(row)
+ raise NotImplementedError.new(
+ 'method get_row must be defined in a derived class')
+ end
+
+ def terminate
+ # Define if needed.
+ end
+ end
+
+
+ class StringReader < Reader
+
+ def initialize(string, col_sep = ?,, row_sep = nil)
+ @col_sep = col_sep
+ @row_sep = row_sep
+ @dev = string
+ @idx = 0
+ if @dev[0, 3] == "\xef\xbb\xbf"
+ @idx += 3
+ end
+ end
+
+ private
+
+ def get_row(row)
+ parsed_cells, next_idx =
+ CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
+ if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
+ raise IllegalFormatError.new
+ end
+ @idx = next_idx
+ parsed_cells
+ end
+ end
+
+
+ class IOReader < Reader
+
+ def initialize(io, col_sep = ?,, row_sep = nil)
+ @io = io
+ @io.binmode if @io.respond_to?(:binmode)
+ @col_sep = col_sep
+ @row_sep = row_sep
+ @dev = CSV::IOBuf.new(@io)
+ @idx = 0
+ if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
+ @idx += 3
+ end
+ @close_on_terminate = false
+ end
+
+ # Tell this reader to close the IO when terminated (Triggered by invoking
+ # CSV::IOReader#close).
+ def close_on_terminate
+ @close_on_terminate = true
+ end
+
+ private
+
+ def get_row(row)
+ parsed_cells, next_idx =
+ CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
+ if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
+ raise IllegalFormatError.new
+ end
+ dropped = @dev.drop(next_idx)
+ @idx = next_idx - dropped
+ parsed_cells
+ end
+
+ def terminate
+ if @close_on_terminate
+ @io.close
+ end
+
+ if @dev
+ @dev.close
+ end
+ end
+ end
+
+
+ # CSV formatted string/stream writer.
+ #
+ # EXAMPLE
+ # Write rows to 'csvout' file.
+ #
+ # outfile = File.open('csvout', 'wb')
+ # CSV::Writer.generate(outfile) do |csv|
+ # csv << ['c1', nil, '', '"', "\r\n", 'c2']
+ # # or
+ # csv.add_row [
+ # CSV::Cell.new('c1', false),
+ # CSV::Cell.new('dummy', true),
+ # CSV::Cell.new('', false),
+ # CSV::Cell.new('"', false),
+ # CSV::Cell.new("\r\n", false)
+ # CSV::Cell.new('c2', false)
+ # ]
+ # ...
+ # ...
+ # end
+ #
+ # outfile.close
+ #
+ class Writer
+
+ # Generate CSV. Given block is called with the writer instance.
+ def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil)
+ writer = Writer.create(str_or_writable, col_sep, row_sep)
+ yield(writer)
+ writer.close
+ nil
+ end
+
+ # str_or_writable must handle '<<(string)'.
+ def Writer.create(str_or_writable, col_sep = ?,, row_sep = nil)
+ BasicWriter.new(str_or_writable, col_sep, row_sep)
+ end
+
+ # dump CSV stream to the device. argument must be an Array of String.
+ def <<(ary)
+ row = ary.collect { |item|
+ if item.is_a?(Cell)
+ item
+ elsif (item.nil?)
+ Cell.new('', true)
+ else
+ Cell.new(item.to_s, false)
+ end
+ }
+ CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
+ self
+ end
+
+ # dump CSV stream to the device. argument must be an Array of CSV::Cell.
+ def add_row(row)
+ CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
+ self
+ end
+
+ def close
+ terminate
+ end
+
+ private
+
+ def initialize(dev)
+ raise RuntimeError.new('do not instanciate this class directly')
+ end
+
+ def terminate
+ # Define if needed.
+ end
+ end
+
+
+ class BasicWriter < Writer
+
+ def initialize(str_or_writable, col_sep = ?,, row_sep = nil)
+ @col_sep = col_sep
+ @row_sep = row_sep
+ @dev = str_or_writable
+ @dev.binmode if @dev.respond_to?(:binmode)
+ @close_on_terminate = false
+ end
+
+ # Tell this writer to close the IO when terminated (Triggered by invoking
+ # CSV::BasicWriter#close).
+ def close_on_terminate
+ @close_on_terminate = true
+ end
+
+ private
+
+ def terminate
+ if @close_on_terminate
+ @dev.close
+ end
+ end
+ end
+
+
+ # Buffered stream.
#
# EXAMPLE 1 -- an IO.
# class MyBuf < StreamBuf
@@ -1101,14 +786,14 @@ private
#
# EXAMPLE 2 -- String.
# This is a conceptual example. No pros with this.
- #
+ #
# class StrBuf < StreamBuf
# def initialize(s)
# @str = s
# @idx = 0
# super()
# end
- #
+ #
# def read(size)
# str = @str[@idx, size]
# @idx += str.size
@@ -1117,26 +802,15 @@ private
# end
#
class StreamBuf # pure virtual. (do not instanciate it directly)
- public
-
- # SYNOPSIS
- # char/str = CSV::StreamBuf#get(idx, n = nil)
- # char/str = CSV::StreamBuf#[idx, n = nil]
- #
- # ARGS
- # idx: index of a string to specify a start point of a string to get.
- # Unlike String instance, idx < 0 returns nil.
- # n: size of a string to get.
- #
- # RETURNS
- # char: if n == nil. A char at idx.
- # str: if n != nil. A partial string, from idx to (idx + size). At
- # EOF, the string size could not equal to arg n.
- #
- # DESCRIPTION
- # Get a char or a partial string from the stream.
- #
- def [](idx, n = nil)
+
+ # get a char or a partial string from the stream.
+ # idx: index of a string to specify a start point of a string to get.
+ # unlike String instance, idx < 0 returns nil.
+ # n: size of a string to get.
+ # returns char at idx if n == nil.
+ # returns a partial string, from idx to (idx + n) if n != nil. at EOF,
+ # the string size could not equal to arg n.
+ def [](idx, n = nil)
if idx < 0
return nil
end
@@ -1182,21 +856,11 @@ private
end
end
alias get []
-
- # SYNOPSIS
- # size_dropped = CSV::StreamBuf#drop(n)
- #
- # ARGS
- # n: drop size
- #
- # RETURNS
- # size_dropped: droped size. At EOF, dropped size might not equals to arg n.
- # 0 if n <= 0.
- #
- # DESCRIPTION
- # Drop a string from the stream. Once you drop the head of the stream,
- # access to the dropped part via [] or get returns nil.
- #
+
+ # drop a string from the stream.
+ # returns dropped size. at EOF, dropped size might not equals to arg n.
+ # Once you drop the head of the stream, access to the dropped part via []
+ # or get returns nil.
def drop(n)
if is_eos?
return 0
@@ -1224,27 +888,13 @@ private
end
size_dropped
end
-
- # SYNOPSIS
- # is_eos = CSV::StreamBuf#is_eos?
- #
- # RETURNS
- # is_eos: true if end of the stream or false.
- #
- # DESCRIPTION
- # Check EOF or not.
- #
+
def is_eos?
return idx_is_eos?(0)
end
-
- # SYNOPSIS
- # N/A
- #
- # DESCRIPTION
- # Do not instanciate this class directly. Define your own class which
- # derives this class and define 'read' instance method.
- #
+
+ # WARN: Do not instanciate this class directly. Define your own class
+ # which derives this class and define 'read' instance method.
def initialize
@buf_list = []
@cur_buf = @buf_tail_idx = -1
@@ -1253,22 +903,24 @@ private
add_buf
@cur_buf = @buf_tail_idx
end
-
+
protected
+
def terminate
while (rel_buf); end
end
-
+
# protected method 'read' must be defined in derived classes.
# CAUTION: Returning a string which size is not equal to 'size' means
- # EnfOfStream. When it is not at EOS, you must block the callee, try to
- # read and return the sized string.
+ # EnfOfStream. When it is not at EOS, you must block the callee, try to
+ # read and return the sized string.
def read(size) # raise EOFError
- raise NotImplementedError.new('Method read must be defined in a derived class.')
+ raise NotImplementedError.new(
+ 'method read must be defined in a derived class')
end
-
+
private
-
+
def buf_size(idx)
@buf_list[idx].size
end
@@ -1296,7 +948,7 @@ private
true
end
end
-
+
def rel_buf
if (@cur_buf < 0)
return false
@@ -1310,16 +962,16 @@ private
return true
end
end
-
+
def idx_is_eos?(idx)
(@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx)))
end
-
+
BufSize = 1024 * 8
end
- # DESCRIPTION
- # CSV::IOBuf -- a class for a bufferd IO.
+
+ # Buffered IO.
#
# EXAMPLE
# # File 'bigdata' could be a giga-byte size one!
@@ -1330,21 +982,21 @@ private
# end
#
class IOBuf < StreamBuf
- public
def initialize(s)
@s = s
super()
end
-
+
def close
terminate
end
private
+
def read(size)
@s.read(size)
end
-
+
def terminate
super()
end