summaryrefslogtreecommitdiff
path: root/lib/csv.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/csv.rb')
-rw-r--r--lib/csv.rb1352
1 files changed, 0 insertions, 1352 deletions
diff --git a/lib/csv.rb b/lib/csv.rb
deleted file mode 100644
index cfc5489ea0..0000000000
--- a/lib/csv.rb
+++ /dev/null
@@ -1,1352 +0,0 @@
-# CSV -- module for generating/parsing CSV data.
-
-# $Id$
-
-# This module is copyrighted free software by NAKAMURA, Hiroshi.
-# You can redistribute it and/or modify it under the same term as Ruby.
-
-
-class CSV
-public
-
- # DESCRIPTION
- # CSV::Cell -- Describes 1 cell of CSV.
- #
- class Cell
- public
-
- # Datum as string.
- attr_accessor :data
-
- # Is this datum null?
- attr_accessor :is_null
-
- # SYNOPSIS
- # cell = CSV::Cell.new(data = '', is_null = true)
- #
- # ARGS
- # data: datum as String
- # is_null: is this datum null?
- #
- # RETURNS
- # cell: Created instance.
- #
- # DESCRIPTION
- # Create instance. If is_null is true, datum is stored in the instance
- # created but it should be treated as 'NULL'.
- #
- def initialize(data = '', is_null = true)
- @data = data
- @is_null = is_null
- end
-
- # SYNOPSIS
- # CSV::Cell#match(rhs)
- #
- # ARGS
- # rhs: an instance of CSV::Cell to be compared.
- #
- # RETURNS
- # true/false. See the souce if you want to know matching algorithm.
- #
- # DESCRIPTION
- # Compare another cell with me. Bare in mind Null matches with Null
- # using this method. Use CSV::Cell#== if you want Null never matches
- # with other data including Null.
- #
- def match(rhs)
- if @is_null and rhs.is_null
- true
- elsif @is_null or rhs.is_null
- false
- else
- @data == rhs.data
- end
- end
-
- # SYNOPSIS
- # CSV::Cell#==(rhs)
- #
- # ARGS
- # rhs: an instance of CSV::Cell to be compared.
- #
- # RETURNS
- # true/false. See the souce if you want to know matching algorithm.
- #
- # DESCRIPTION
- # Compare another cell with me. Bare in mind Null is not match with
- # Null using this method. Null never matches with other data including
- # Null. Use CSV::Cell#match if you want Null matches with Null.
- #
- def ==(rhs)
- if @is_null or rhs.is_null
- false
- else
- @data == rhs.data
- end
- end
-
- def to_str
- content.to_str
- end
-
- def to_s
- content.to_s
- end
-
- private
-
- def content
- @is_null ? nil : data
- end
- end
-
-
- # DESCRIPTION
- # CSV::Row -- Describes a row of CSV. Each element must be a CSV::Cell.
- #
- class Row < Array
- public
-
- # SYNOPSIS
- # CSV::Row#to_a
- #
- # RETURNS
- # An Array of String.
- #
- # DESCRIPTION
- # Convert CSV::Cell to String. Null is converted to nil.
- #
- def to_a
- self.collect { |cell| cell.is_null ? nil : cell.data }
- end
-
- # SYNOPSIS
- # CSV::Row#match(rhs)
- #
- # ARGS
- # rhs: an Array of cells. Each cell is a instance of CSV::Cell.
- #
- # RETURNS
- # true/false. See the souce if you want to know matching algorithm.
- #
- # DESCRIPTION
- # Compare another row with me.
- #
- def match(rhs)
- if self.size != rhs.size
- return false
- end
- for idx in 0...(self.size)
- unless self[idx].match(rhs[idx])
- return false
- end
- end
- true
- end
- end
-
-
- # SYNOPSIS
- # 1. reader = CSV.open(filename, 'r')
- #
- # 2. CSV.open(filename, 'r') do |row|
- # ...
- # end
- #
- # 3. writer = CSV.open(filename, 'w')
- #
- # 4. CSV.open(filename, 'w') do |writer|
- # ...
- # end
- #
- # ARGS
- # filename: filename to open.
- # mode: 'r' for read (parse)
- # 'w' for write (generate)
- # row: an Array of cells which is a parsed line.
- # writer: Created writer instance. See CSV::Writer#<< and
- # CSV::Writer#add_row to know how to generate CSV string.
- #
- # RETURNS
- # reader: Create reader instance. To get parse result, see
- # CSV::Reader#each.
- # writer: Created writer instance. See CSV::Writer#<< and
- # CSV::Writer#add_row to know how to generate CSV string.
- #
- # DESCRIPTION
- # Open a CSV formatted file to read or write.
- #
- # EXAMPLE 1
- # reader = CSV.open('csvfile.csv', 'r')
- # row1 = reader.shift
- # row2 = reader.shift
- # if row2.empty?
- # p 'row2 not find.'
- # end
- # reader.close
- #
- # EXAMPLE 2
- # CSV.open('csvfile.csv', 'r') do |row|
- # p row
- # end
- #
- # EXAMPLE 3
- # writer = CSV.open('csvfile.csv', 'w')
- # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
- # writer.close
- #
- # EXAMPLE 4
- # CSV.open('csvfile.csv', 'w') do |writer|
- # writer << ['r1c1', 'r1c2']
- # writer << ['r2c1', 'r2c2']
- # writer << [nil, nil]
- # end
- #
- def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block)
- if mode == 'r' or mode == 'rb'
- open_reader(filename, col_sep, row_sep, &block)
- elsif mode == 'w' or mode == 'wb'
- open_writer(filename, col_sep, row_sep, &block)
- else
- raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
- end
- end
-
- def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block)
- open_reader(filename, col_sep, row_sep, &block)
- end
-
- def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block)
- open_writer(filename, col_sep, row_sep, &block)
- end
-
- # Private class methods.
- class << self
- private
- def open_reader(filename, col_sep, row_sep, &block)
- file = File.open(filename, 'rb')
- if block
- begin
- CSV::Reader.parse(file, col_sep, row_sep) do |row|
- yield(row)
- end
- ensure
- file.close
- end
- nil
- else
- reader = CSV::Reader.create(file, col_sep, row_sep)
- reader.close_on_terminate
- reader
- end
- end
-
- def open_writer(filename, col_sep, row_sep, &block)
- file = File.open(filename, 'wb')
- if block
- begin
- CSV::Writer.generate(file, col_sep, row_sep) do |writer|
- yield(writer)
- end
- ensure
- file.close
- end
- nil
- else
- writer = CSV::Writer.create(file, col_sep, row_sep)
- writer.close_on_terminate
- writer
- end
- end
- end
-
-
- # DESCRIPTION
- # CSV::Reader -- CSV formatted string/stream reader.
- #
- # EXAMPLE
- # Read CSV lines untill the first column is 'stop'.
- #
- # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
- # p row
- # break if !row[0].is_null && row[0].data == 'stop'
- # end
- #
- class Reader
- include Enumerable
- public
-
- # SYNOPSIS
- # reader = CSV::Reader.create(str_or_readable)
- #
- # ARGS
- # str_or_readable: a CSV data to be parsed. A String or an IO.
- #
- # RETURNS
- # reader: Created instance.
- #
- # DESCRIPTION
- # Create instance. To get parse result, see CSV::Reader#each.
- #
- def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil)
- case str_or_readable
- when IO
- IOReader.new(str_or_readable, col_sep, row_sep)
- when String
- StringReader.new(str_or_readable, col_sep, row_sep)
- else
- IOReader.new(str_or_readable, col_sep, row_sep)
- end
- end
-
- # SYNOPSIS
- # CSV::Reader.parse(str_or_readable) do |row|
- # ...
- # end
- #
- # ARGS
- # str_or_readable: a CSV data to be parsed. A String or an IO.
- # row: a CSV::Row; an Array of a CSV::Cell in a line.
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Parse CSV data and get lines. Caller block is called for each line
- # with an argument which is a chunk of cells in a row.
- #
- # Block value is always nil. Rows are not cached for performance
- # reason.
- #
- def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil)
- reader = create(str_or_readable, col_sep, row_sep)
- reader.each do |row|
- yield(row)
- end
- reader.close
- nil
- end
-
- # SYNOPSIS
- # CSV::Reader#each do |row|
- # ...
- # end
- #
- # ARGS
- # row: a CSV::Row; an Array of a CSV::Cell in a line.
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Caller block is called for each line with an argument which is a chunk
- # of cells in a row.
- #
- # Block value is always nil. Rows are not cached for performance
- # reason.
- #
- def each
- while true
- row = Row.new
- parsed_cells = get_row(row)
- if parsed_cells == 0
- break
- end
- yield(row)
- end
- nil
- end
-
- # SYNOPSIS
- # cell = CSV::Reader#shift
- #
- # RETURNS
- # cell: a CSV::Row; an Array of a CSV::Cell.
- #
- # DESCRIPTION
- # Extract cells of next line.
- #
- def shift
- row = Row.new
- parsed_cells = get_row(row)
- row
- end
-
- # SYNOPSIS
- # CSV::Reader#close
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Close this reader.
- #
- def close
- terminate
- end
-
- private
- def initialize(dev)
- raise RuntimeError.new('Do not instanciate this class directly.')
- end
-
- def get_row(row)
- raise NotImplementedError.new('Method get_row must be defined in a derived class.')
- end
-
- def terminate
- # Define if needed.
- end
- end
-
-
- # DESCRIPTION
- # CSV::StringReader -- CSV formatted stream reader.
- #
- # EXAMPLE
- # Read CSV lines untill the first column is 'stop'.
- #
- # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
- # p row
- # break if !row[0].is_null && row[0].data == 'stop'
- # end
- #
- class StringReader < Reader
- public
-
- # SYNOPSIS
- # reader = CSV::StringReader.new(string)
- #
- # ARGS
- # string: a CSV String to be parsed.
- #
- # RETURNS
- # reader: Created instance.
- #
- # DESCRIPTION
- # Create instance. To get parse result, see CSV::Reader#each.
- #
- def initialize(string, col_sep = ?,, row_sep = nil)
- @col_sep = col_sep
- @row_sep = row_sep
- @dev = string
- @idx = 0
- if @dev[0, 3] == "\xef\xbb\xbf"
- @idx += 3
- end
- end
-
- private
- def get_row(row)
- parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
- if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
- raise IllegalFormatError.new
- end
- @idx = next_idx
- parsed_cells
- end
- end
-
-
- # DESCRIPTION
- # CSV::IOReader -- CSV formatted stream reader.
- #
- # EXAMPLE
- # Read CSV lines untill the first column is 'stop'.
- #
- # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
- # p row
- # break if !row[0].is_null && row[0].data == 'stop'
- # end
- #
- class IOReader < Reader
- public
-
- # SYNOPSIS
- # reader = CSV::IOReader.new(io)
- #
- # ARGS
- # io: a CSV data to be parsed. Must be an IO. (io#read is called.)
- #
- # RETURNS
- # reader: Created instance.
- #
- # DESCRIPTION
- # Create instance. To get parse result, see CSV::Reader#each.
- #
- def initialize(io, col_sep = ?,, row_sep = nil)
- @io = io
- @io.binmode if @io.respond_to?(:binmode)
- @col_sep = col_sep
- @row_sep = row_sep
- @dev = CSV::IOBuf.new(@io)
- @idx = 0
- if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
- @idx += 3
- end
- @close_on_terminate = false
- end
-
- # SYNOPSIS
- # CSV::IOReader#close_on_terminate
- #
- # RETURNS
- # true
- #
- # DESCRIPTION
- # Tell this reader to close the IO when terminated (Triggered by invoking
- # CSV::IOReader#close).
- #
- def close_on_terminate
- @close_on_terminate = true
- end
-
- private
- def get_row(row)
- parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
- if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
- raise IllegalFormatError.new
- end
- dropped = @dev.drop(next_idx)
- @idx = next_idx - dropped
- parsed_cells
- end
-
- def terminate
- if @close_on_terminate
- @io.close
- end
-
- if @dev
- @dev.close
- end
- end
- end
-
-
- # DESCRIPTION
- # CSV::Writer -- CSV formatted string/stream writer.
- #
- # EXAMPLE
- # Write rows to 'csvout' file.
- #
- # outfile = File.open('csvout', 'wb')
- # CSV::Writer.generate(outfile) do |csv|
- # csv << ['c1', nil, '', '"', "\r\n", 'c2']
- # # or
- # csv.add_row [
- # CSV::Cell.new('c1', false),
- # CSV::Cell.new('dummy', true),
- # CSV::Cell.new('', false),
- # CSV::Cell.new('"', false),
- # CSV::Cell.new("\r\n", false)
- # CSV::Cell.new('c2', false)
- # ]
- # ...
- # ...
- # end
- #
- # outfile.close
- #
- class Writer
- public
-
- # SYNOPSIS
- # writer = CSV::Writer.create(str_or_readable)
- #
- # ARGS
- # str_or_writable: device for generated CSV string. Must respond to
- # '<<(string)'.
- #
- # RETURNS
- # writer: Created instance.
- #
- # DESCRIPTION
- # Create instance. To add CSV data to generate CSV string, see
- # CSV::Writer#<< or CSV::Writer#add_row.
- #
- def Writer.create(str_or_writable, col_sep = ?,, row_sep = nil)
- BasicWriter.new(str_or_writable, col_sep, row_sep)
- end
-
- # SYNOPSIS
- # CSV::Writer.generate(str_or_writable) do |writer|
- # ...
- # end
- #
- # ARGS
- # str_or_writable: device for generated CSV string. Must respond to
- # '<<(string)'.
- # writer: Created writer instance. See CSV::Writer#<< and
- # CSV::Writer#add_row to know how to generate CSV string.
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Create writer instance. Caller block is called with the new instance.
- # To add CSV data to generate CSV string, see CSV::Writer#<< or
- # CSV::Writer#add_row.
- #
- def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil)
- writer = Writer.create(str_or_writable, col_sep, row_sep)
- yield(writer)
- writer.close
- nil
- end
-
- # SYNOPSIS
- # CSV::Writer#<<(row)
- #
- # ARGS
- # row: an Array of a String.
- #
- # RETURNS
- # self
- #
- # DESCRIPTION
- # Dump CSV stream to the device. Argument is an array of a String like
- # ['c1', 'c2', 'c3'].
- #
- def <<(ary)
- row = ary.collect { |item|
- if item.is_a?(Cell)
- item
- elsif (item.nil?)
- Cell.new('', true)
- else
- Cell.new(item.to_s, false)
- end
- }
- CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
- self
- end
-
- # SYNOPSIS
- # CSV::Writer#<<(row)
- #
- # ARGS
- # row: an Array of a CSV::Cell.
- #
- # RETURNS
- # self
- #
- # DESCRIPTION
- # Dump CSV stream to the device. Argument is an array of a CSV::Cell
- # like [CSV::Cell.new('c1', false), CSV::Cell.new('dummy', true)].
- # (Formar is 'c1' and latter is Null.)
- #
- def add_row(row)
- CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
- self
- end
-
- # SYNOPSIS
- # CSV::Writer#close
- #
- # RETURNS
- # nil
- #
- # DESCRIPTION
- # Close this writer.
- #
- def close
- terminate
- end
-
- private
- def initialize(dev)
- raise RuntimeError.new('Do not instanciate this class directly.')
- end
-
- def terminate
- # Define if needed.
- end
- end
-
-
- # DESCRIPTION
- # CSV::BasicWriter -- CSV formatted string/stream writer using <<.
- #
- class BasicWriter < Writer
- public
-
- # SYNOPSIS
- # writer = CSV::BasicWriter.new(str_or_writable)
- #
- # ARGS
- # str_or_writable: device for generated CSV string. Must respond to
- # '<<(string)'.
- #
- # RETURNS
- # writer: Created instance.
- #
- # DESCRIPTION
- # Create instance. To add CSV data to generate CSV string, see
- # CSV::Writer#<< or CSV::Writer#add_row.
- #
- def initialize(str_or_writable, col_sep = ?,, row_sep = nil)
- @col_sep = col_sep
- @row_sep = row_sep
- @dev = str_or_writable
- @dev.binmode if @dev.respond_to?(:binmode)
- @close_on_terminate = false
- end
-
- # SYNOPSIS
- # CSV::BasicWriter#close_on_terminate
- #
- # RETURNS
- # true
- #
- # DESCRIPTION
- # Tell this writer to close the IO when terminated (Triggered by invoking
- # CSV::BasicWriter#close).
- #
- def close_on_terminate
- @close_on_terminate = true
- end
-
- private
- def terminate
- if @close_on_terminate
- @dev.close
- end
- end
- end
-
- # SYNOPSIS
- # cells = CSV.parse_line(src, col_sep = ?,, row_sep = nil)
- #
- # ARGS
- # src: a CSV String.
- # col_sep: Column separator. ?, by default. If you want to separate
- # fields with semicolon, give ?; here.
- # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
- # want to separate records with \r, give ?\r here.
- #
- # RETURNS
- # cells: an Array of parsed cells in first line. Each cell is a String.
- #
- # DESCRIPTION
- # Parse one line from given string. Bare in mind it parses ONE LINE. Rest
- # of the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
- # second line 'c,d' is ignored.
- #
- # If you don't know whether a target string to parse is exactly 1 line or
- # not, use CSV.parse_row instead of this method.
- #
- def CSV.parse_line(src, col_sep = ?,, row_sep = nil)
- idx = 0
- res_type = :DT_COLSEP
- cells = Row.new
- begin
- while (res_type.equal?(:DT_COLSEP))
- cell = Cell.new
- res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
- cells.push(cell.is_null ? nil : cell.data)
- end
- rescue IllegalFormatError
- return Row.new
- end
- cells
- end
-
-
- # SYNOPSIS
- # str = CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
- #
- # ARGS
- # cells: an Array of cell to be converted to CSV string. Each cell must
- # respond to 'to_s'.
- # col_sep: Column separator. ?, by default. If you want to separate
- # fields with semicolon, give ?; here.
- # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
- # want to separate records with \r, give ?\r here.
- #
- # RETURNS
- # str: a String of generated CSV string.
- #
- # DESCRIPTION
- # Create a line from cells. Each cell is stringified by to_s.
- #
- def CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
- if (cells.size == 0)
- return ''
- end
- res_type = :DT_COLSEP
- result_str = ''
- idx = 0
- while true
- cell = if (cells[idx].nil?)
- Cell.new('', true)
- else
- Cell.new(cells[idx].to_s, false)
- end
- generate_body(cell, result_str, col_sep, row_sep)
- idx += 1
- if (idx == cells.size)
- break
- end
- generate_separator(:DT_COLSEP, result_str, col_sep, row_sep)
- end
- result_str
- end
-
- # SYNOPSIS
- # parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
- #
- # ARGS
- # src: a CSV data to be parsed. Must respond '[](idx)'.
- # src[](idx) must return a char. (Not a string such as 'a', but 97).
- # src[](idx_out_of_bounds) must return nil. A String satisfies this
- # requirement.
- # idx: index of parsing location of 'src'. 0 origin.
- # out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'.
- # col_sep: Column separator. ?, by default. If you want to separate
- # fields with semicolon, give ?; here.
- # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
- # want to separate records with \r, give ?\r here.
- #
- # RETURNS
- # parsed_cells: num of parsed cells.
- # idx: index of next parsing location of 'src'.
- #
- # DESCRIPTION
- # Parse a line from string. To parse lines in CSV string, see EXAMPLE
- # below.
- #
- # EXAMPLE
- # src = "a,b\r\nc,d\r\ne,f"
- # idx = 0
- # begin
- # parsed = []
- # parsed_cells, idx = CSV.parse_row(src, idx, parsed)
- # puts "Parsed #{ parsed_cells } cells."
- # p parsed
- # end while parsed_cells > 0
- #
- def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
- idx_backup = idx
- parsed_cells = 0
- res_type = :DT_COLSEP
- begin
- while (!res_type.equal?(:DT_ROWSEP))
- cell = Cell.new
- res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
- if res_type.equal?(:DT_EOS)
- if idx == idx_backup #((parsed_cells == 0) && (cell.is_null))
- return 0, 0
- end
- res_type = :DT_ROWSEP
- end
- parsed_cells += 1
- out_dev << cell
- end
- rescue IllegalFormatError
- return 0, 0
- end
- return parsed_cells, idx
- end
-
- # SYNOPSIS
- # parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
- #
- # ARGS
- # src: an Array of CSV::Cell to be converted to CSV string. Must respond to
- # 'size' and '[](idx)'. src[idx] must return CSV::Cell.
- # cells: num of cells in a line.
- # out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
- # col_sep: Column separator. ?, by default. If you want to separate
- # fields with semicolon, give ?; here.
- # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
- # want to separate records with \r, give ?\r here.
- #
- # RETURNS
- # parsed_cells: num of converted cells.
- #
- # DESCRIPTION
- # Convert a line from cells data to string. To generate multi-row CSV
- # string, See EXAMPLE below.
- #
- # EXAMPLE
- # def d(str)
- # CSV::Cell.new(str, false)
- # end
- #
- # row1 = [d('a'), d('b')]
- # row2 = [d('c'), d('d')]
- # row3 = [d('e'), d('f')]
- # src = [row1, row2, row3]
- # buf = ''
- # src.each do |row|
- # parsed_cells = CSV.generate_row(row, 2, buf)
- # puts "Created #{ parsed_cells } cells."
- # end
- # p buf
- #
- def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
- src_size = src.size
- if (src_size == 0)
- if cells == 0
- generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
- end
- return 0
- end
- res_type = :DT_COLSEP
- parsed_cells = 0
- generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
- parsed_cells += 1
- while ((parsed_cells < cells) && (parsed_cells != src_size))
- generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
- generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
- parsed_cells += 1
- end
- if (parsed_cells == cells)
- generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
- else
- generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
- end
- parsed_cells
- end
-
-private
- class IllegalFormatError < RuntimeError; end
-
- # Private class methods.
- class << self
- private
-
- def parse_body(src, idx, cell, col_sep, row_sep)
- row_sep_end = row_sep || ?\n
- cell.is_null = false
- state = :ST_START
- quoted = false
- cr = false
- c = nil
- while (c = src[idx])
- idx += 1
- result_state = :DT_UNKNOWN
- if (c == col_sep)
- if state.equal?(:ST_DATA)
- if cr
- raise IllegalFormatError.new
- end
- if (!quoted)
- state = :ST_END
- result_state = :DT_COLSEP
- else
- cell.data << c.chr
- end
- elsif state.equal?(:ST_QUOTE)
- if cr
- raise IllegalFormatError.new
- end
- state = :ST_END
- result_state = :DT_COLSEP
- else # :ST_START
- cell.is_null = true
- state = :ST_END
- result_state = :DT_COLSEP
- end
- elsif (c == ?") # " for vim syntax hilighting.
- if state.equal?(:ST_DATA)
- if cr
- raise IllegalFormatError.new
- end
- if quoted
- quoted = false
- state = :ST_QUOTE
- else
- raise IllegalFormatError.new
- end
- elsif state.equal?(:ST_QUOTE)
- cell.data << c.chr
- quoted = true
- state = :ST_DATA
- else # :ST_START
- quoted = true
- state = :ST_DATA
- end
- elsif row_sep.nil? and c == ?\r
- if cr
- raise IllegalFormatError.new
- end
- if quoted
- cell.data << c.chr
- state = :ST_DATA
- else
- cr = true
- end
- elsif c == row_sep_end
- if state.equal?(:ST_DATA)
- if cr
- state = :ST_END
- result_state = :DT_ROWSEP
- cr = false
- else
- if quoted
- cell.data << c.chr
- state = :ST_DATA
- else
- state = :ST_END
- result_state = :DT_ROWSEP
- end
- end
- elsif state.equal?(:ST_QUOTE)
- state = :ST_END
- result_state = :DT_ROWSEP
- if cr
- cr = false
- end
- else # :ST_START
- cell.is_null = true
- state = :ST_END
- result_state = :DT_ROWSEP
- end
- else
- if state.equal?(:ST_DATA) || state.equal?(:ST_START)
- if cr
- raise IllegalFormatError.new
- end
- cell.data << c.chr
- state = :ST_DATA
- else # :ST_QUOTE
- raise IllegalFormatError.new
- end
- end
- if state.equal?(:ST_END)
- return result_state, idx;
- end
- end
- if state.equal?(:ST_START)
- cell.is_null = true
- elsif state.equal?(:ST_QUOTE)
- true # dummy for coverate; only a data
- elsif quoted
- raise IllegalFormatError.new
- elsif cr
- raise IllegalFormatError.new
- end
- return :DT_EOS, idx
- end
-
- def generate_body(cells, out_dev, col_sep, row_sep)
- row_data = cells.data.dup
- if (!cells.is_null)
- if (row_data.gsub!('"', '""') ||
- row_data.include?(col_sep) ||
- (row_sep && row_data.index(row_sep)) ||
- (/[\r\n]/ =~ row_data) ||
- (cells.data.empty?))
- out_dev << '"' << row_data << '"'
- else
- out_dev << row_data
- end
- end
- end
-
- def generate_separator(type, out_dev, col_sep, row_sep)
- case type
- when :DT_COLSEP
- out_dev << col_sep.chr
- when :DT_ROWSEP
- out_dev << (row_sep ? row_sep.chr : "\r\n")
- end
- end
- end
-
-
- # DESCRIPTION
- # CSV::StreamBuf -- a class for a bufferd stream.
- #
- # EXAMPLE 1 -- an IO.
- # class MyBuf < StreamBuf
- # # Do initialize myself before a super class. Super class might call my
- # # method 'read'. (Could be awful for C++ user. :-)
- # def initialize(s)
- # @s = s
- # super()
- # end
- #
- # # define my own 'read' method.
- # # CAUTION: Returning nil means EnfOfStream.
- # def read(size)
- # @s.read(size)
- # end
- #
- # # release buffers. in Ruby which has GC, you do not have to call this...
- # def terminate
- # @s = nil
- # super()
- # end
- # end
- #
- # buf = MyBuf.new(STDIN)
- # my_str = ''
- # p buf[0, 0] # => '' (null string)
- # p buf[0] # => 97 (char code of 'a')
- # p buf[0, 1] # => 'a'
- # my_str = buf[0, 5]
- # p my_str # => 'abcde' (5 chars)
- # p buf[0, 6] # => "abcde\n" (6 chars)
- # p buf[0, 7] # => "abcde\n" (6 chars)
- # p buf.drop(3) # => 3 (dropped chars)
- # p buf.get(0, 2) # => 'de' (2 chars)
- # p buf.is_eos? # => false (is not EOS here)
- # p buf.drop(5) # => 3 (dropped chars)
- # p buf.is_eos? # => true (is EOS here)
- # p buf[0] # => nil (is EOS here)
- #
- # EXAMPLE 2 -- String.
- # This is a conceptual example. No pros with this.
- #
- # class StrBuf < StreamBuf
- # def initialize(s)
- # @str = s
- # @idx = 0
- # super()
- # end
- #
- # def read(size)
- # str = @str[@idx, size]
- # @idx += str.size
- # str
- # end
- # end
- #
- class StreamBuf # pure virtual. (do not instanciate it directly)
- public
-
- # SYNOPSIS
- # char/str = CSV::StreamBuf#get(idx, n = nil)
- # char/str = CSV::StreamBuf#[idx, n = nil]
- #
- # ARGS
- # idx: index of a string to specify a start point of a string to get.
- # Unlike String instance, idx < 0 returns nil.
- # n: size of a string to get.
- #
- # RETURNS
- # char: if n == nil. A char at idx.
- # str: if n != nil. A partial string, from idx to (idx + size). At
- # EOF, the string size could not equal to arg n.
- #
- # DESCRIPTION
- # Get a char or a partial string from the stream.
- #
- def [](idx, n = nil)
- if idx < 0
- return nil
- end
- if (idx_is_eos?(idx))
- if n and (@offset + idx == buf_size(@cur_buf))
- # Like a String, 'abc'[4, 1] returns nil and
- # 'abc'[3, 1] returns '' not nil.
- return ''
- else
- return nil
- end
- end
- my_buf = @cur_buf
- my_offset = @offset
- next_idx = idx
- while (my_offset + next_idx >= buf_size(my_buf))
- if (my_buf == @buf_tail_idx)
- unless add_buf
- break
- end
- end
- next_idx = my_offset + next_idx - buf_size(my_buf)
- my_buf += 1
- my_offset = 0
- end
- loc = my_offset + next_idx
- if !n
- return @buf_list[my_buf][loc] # Fixnum of char code.
- elsif (loc + n - 1 < buf_size(my_buf))
- return @buf_list[my_buf][loc, n] # String.
- else # should do loop insted of (tail) recursive call...
- res = @buf_list[my_buf][loc, BufSize]
- size_added = buf_size(my_buf) - loc
- if size_added > 0
- idx += size_added
- n -= size_added
- ret = self[idx, n]
- if ret
- res << ret
- end
- end
- return res
- end
- end
- alias get []
-
- # SYNOPSIS
- # size_dropped = CSV::StreamBuf#drop(n)
- #
- # ARGS
- # n: drop size
- #
- # RETURNS
- # size_dropped: droped size. At EOF, dropped size might not equals to arg n.
- # 0 if n <= 0.
- #
- # DESCRIPTION
- # Drop a string from the stream. Once you drop the head of the stream,
- # access to the dropped part via [] or get returns nil.
- #
- def drop(n)
- if is_eos?
- return 0
- end
- size_dropped = 0
- while (n > 0)
- if (!@is_eos || (@cur_buf != @buf_tail_idx))
- if (@offset + n < buf_size(@cur_buf))
- size_dropped += n
- @offset += n
- n = 0
- else
- size = buf_size(@cur_buf) - @offset
- size_dropped += size
- n -= size
- @offset = 0
- unless rel_buf
- unless add_buf
- break
- end
- @cur_buf = @buf_tail_idx
- end
- end
- end
- end
- size_dropped
- end
-
- # SYNOPSIS
- # is_eos = CSV::StreamBuf#is_eos?
- #
- # RETURNS
- # is_eos: true if end of the stream or false.
- #
- # DESCRIPTION
- # Check EOF or not.
- #
- def is_eos?
- return idx_is_eos?(0)
- end
-
- # SYNOPSIS
- # N/A
- #
- # DESCRIPTION
- # Do not instanciate this class directly. Define your own class which
- # derives this class and define 'read' instance method.
- #
- def initialize
- @buf_list = []
- @cur_buf = @buf_tail_idx = -1
- @offset = 0
- @is_eos = false
- add_buf
- @cur_buf = @buf_tail_idx
- end
-
- protected
- def terminate
- while (rel_buf); end
- end
-
- # protected method 'read' must be defined in derived classes.
- # CAUTION: Returning a string which size is not equal to 'size' means
- # EnfOfStream. When it is not at EOS, you must block the callee, try to
- # read and return the sized string.
- def read(size) # raise EOFError
- raise NotImplementedError.new('Method read must be defined in a derived class.')
- end
-
- private
-
- def buf_size(idx)
- @buf_list[idx].size
- end
-
- def add_buf
- if @is_eos
- return false
- end
- begin
- str_read = read(BufSize)
- rescue EOFError
- str_read = nil
- rescue
- terminate
- raise
- end
- if str_read.nil?
- @is_eos = true
- @buf_list.push('')
- @buf_tail_idx += 1
- false
- else
- @buf_list.push(str_read)
- @buf_tail_idx += 1
- true
- end
- end
-
- def rel_buf
- if (@cur_buf < 0)
- return false
- end
- @buf_list[@cur_buf] = nil
- if (@cur_buf == @buf_tail_idx)
- @cur_buf = -1
- return false
- else
- @cur_buf += 1
- return true
- end
- end
-
- def idx_is_eos?(idx)
- (@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx)))
- end
-
- BufSize = 1024 * 8
- end
-
- # DESCRIPTION
- # CSV::IOBuf -- a class for a bufferd IO.
- #
- # EXAMPLE
- # # File 'bigdata' could be a giga-byte size one!
- # buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
- # CSV::Reader.new(buf).each do |row|
- # p row
- # break if row[0].data == 'admin'
- # end
- #
- class IOBuf < StreamBuf
- public
- def initialize(s)
- @s = s
- super()
- end
-
- def close
- terminate
- end
-
- private
- def read(size)
- @s.read(size)
- end
-
- def terminate
- super()
- end
- end
-end