From 1c9d6dd646d508a3b4de0e84424283242708fe77 Mon Sep 17 00:00:00 2001 From: nahi Date: Thu, 27 May 2004 14:39:11 +0000 Subject: * lib/logger.rb: leading 0 padding of timestamp usec part. * lib/csv.rb (CSV.parse): [CAUTION] behavior changed. in the past, CSV.parse accepts a filename to be read-opened (it was just a shortcut of CSV.open(filename, 'r')). now CSV.parse accepts a string or a stream to be parsed e.g. CSV.parse("1,2\n3,r") #=> [['1', '2'], ['3', '4']] * lib/csv.rb: CSV::Row and CSV::Cell are deprecated. these classes are removed in the future. in the new csv.rb, row is represented as just an Array. since CSV::Row was a subclass of Array, it won't hurt almost all programs except one which depended CSV::Row#match. and a cell is represented as just a String or nil(NULL). this change will cause widespread destruction. CSV.open("foo.csv", "r") do |row| row.each do |cell| if cell.is_null # using Cell#is_null p "(NULL)" else p cell.data # using Cell#data end end end must be just; CSV.open("foo.csv", "r") do |row| row.each do |cell| if cell.nil? p "(NULL)" else p cell end end end * lib/csv.rb: [CAUTION] record separator(CR, LF, CR+LF) behavior change. CSV.open, CSV.parse, and CSV,generate now do not force opened file binmode. formerly it set binmode explicitly. with CSV.open, binmode of opened file depends the given mode parameter "r", "w", "rb", and "wb". CSV.parse and CSV.generate open file with "r" and "w". setting mode properly is user's responsibility now. * lib/csv.rb: accepts String as a fs (field separator/column separator) and rs (record separator/row separator) * lib/csv.rb (CSV.read, CSV.readlines): added. works as IO.read and IO.readlines in CSV format. * lib/csv.rb: added CSV.foreach(path, rs = nil, &block). CSV.foreach now does not handle "| cmd" as a path different from IO.foreach. needed? * test/csv/test_csv.rb: updated. * test/ruby/test_float.rb: added test_strtod to test Float("0"). git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@6424 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/csv.rb | 694 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 341 insertions(+), 353 deletions(-) (limited to 'lib/csv.rb') diff --git a/lib/csv.rb b/lib/csv.rb index 3eb13192fe..f6c12fa285 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -1,110 +1,42 @@ # CSV -- module for generating/parsing CSV data. - +# Copyright (C) 2000-2004 NAKAMURA, Hiroshi . + # $Id$ - + # This program is copyrighted free software by NAKAMURA, Hiroshi. You can # redistribute it and/or modify it under the same terms of Ruby's license; # either the dual license version in 2003, or any later version. - - + + class CSV + class IllegalFormatError < RuntimeError; end - # Describes a cell of CSV. - class Cell - # Datum as string. - attr_accessor :data - - # Is this datum NULL? - attr_accessor :is_null - - # If is_null is true, datum is stored in the instance created but it - # should be treated as 'NULL'. - def initialize(data = '', is_null = true) - @data = data - @is_null = is_null - end - - # Compares another cell with self. Bear in mind NULL matches with NULL. - # Use CSV::Cell#== if you don't want NULL matches with NULL. - # rhs: an instance of CSV::Cell to be compared. - def match(rhs) - if @is_null and rhs.is_null - true - elsif @is_null or rhs.is_null - false - else - @data == rhs.data - end - end - - # Compares another cell with self. Bear in mind NULL does not match with - # NULL. Use CSV::Cell#match if you want NULL matches with NULL. - # rhs: an instance of CSV::Cell to be compared. - def ==(rhs) - if @is_null or rhs.is_null - false - else - @data == rhs.data - end - end - - def to_str - content.to_str - end - - def to_s - content.to_s + # deprecated + class Cell < String + def initialize(data = "", is_null = false) + super(is_null ? "" : data) end - private - - def content - @is_null ? nil : data + def data + to_s end end - - # Describes a row of CSV. Each element must be a CSV::Cell. + # deprecated class Row < Array - - # Returns the strings contained in the row's cells. - def to_a - self.collect { |cell| cell.is_null ? nil : cell.data } - end - - # Compares another row with self. - # rhs: an Array of cells. Each cell should be a CSV::Cell. - def match(rhs) - if self.size != rhs.size - return false - end - for idx in 0...(self.size) - unless self[idx].match(rhs[idx]) - return false - end - end - true - end end - - class IllegalFormatError < RuntimeError; end - - - def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block) - if mode == 'r' or mode == 'rb' - open_reader(filename, col_sep, row_sep, &block) - elsif mode == 'w' or mode == 'wb' - open_writer(filename, col_sep, row_sep, &block) - else - raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'") - end - end - - # Open a CSV formatted file for reading. + # Open a CSV formatted file for reading or writing. + # + # For reading. # # EXAMPLE 1 - # reader = CSV.parse('csvfile.csv') + # CSV.open('csvfile.csv', 'r') do |row| + # p row + # end + # + # EXAMPLE 2 + # reader = CSV.open('csvfile.csv', 'r') # row1 = reader.shift # row2 = reader.shift # if row2.empty? @@ -112,11 +44,6 @@ class CSV # end # reader.close # - # EXAMPLE 2 - # CSV.parse('csvfile.csv') do |row| - # p row - # end - # # ARGS # filename: filename to parse. # col_sep: Column separator. ?, by default. If you want to separate @@ -127,24 +54,21 @@ class CSV # RETURNS # reader instance. To get parse result, see CSV::Reader#each. # - def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block) - open_reader(filename, col_sep, row_sep, &block) - end - - # Open a CSV formatted file for writing. # - # EXAMPLE 1 - # writer = CSV.generate('csvfile.csv') - # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil] - # writer.close + # For writing. # - # EXAMPLE 2 - # CSV.generate('csvfile.csv') do |writer| + # EXAMPLE 1 + # CSV.open('csvfile.csv', 'w') do |writer| # writer << ['r1c1', 'r1c2'] # writer << ['r2c1', 'r2c2'] # writer << [nil, nil] # end # + # EXAMPLE 2 + # writer = CSV.open('csvfile.csv', 'w') + # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil] + # writer.close + # # ARGS # filename: filename to generate. # col_sep: Column separator. ?, by default. If you want to separate @@ -156,8 +80,52 @@ class CSV # writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how # to generate CSV string. # - def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block) - open_writer(filename, col_sep, row_sep, &block) + def CSV.open(path, mode, fs = nil, rs = nil, &block) + if mode == 'r' or mode == 'rb' + open_reader(path, mode, fs, rs, &block) + elsif mode == 'w' or mode == 'wb' + open_writer(path, mode, fs, rs, &block) + else + raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'") + end + end + + def CSV.foreach(path, rs = nil, &block) + open_reader(path, 'r', ',', rs, &block) + end + + def CSV.read(path, length = nil, offset = nil) + CSV.parse(IO.read(path, length, offset)) + end + + def CSV.readlines(path, rs = nil) + reader = open_reader(path, 'r', ',', rs) + begin + reader.collect { |row| row } + ensure + reader.close + end + end + + def CSV.generate(path, fs = nil, rs = nil, &block) + open_writer(path, 'w', fs, rs, &block) + end + + # Parse lines from given string or stream. Return rows as an Array of Arrays. + def CSV.parse(str_or_readable, fs = nil, rs = nil, &block) + if File.exist?(str_or_readable) + STDERR.puts("CSV.parse(filename) is deprecated." + + " Use CSV.open(filename, 'r') instead.") + return open_reader(str_or_readable, 'r', fs, rs, &block) + end + if block + CSV::Reader.parse(str_or_readable, fs, rs) do |row| + yield(row) + end + nil + else + CSV::Reader.create(str_or_readable, fs, rs).collect { |row| row } + end end # Parse a line from given string. Bear in mind it parses ONE LINE. Rest of @@ -166,47 +134,54 @@ class CSV # # If you don't know whether a target string to parse is exactly 1 line or # not, use CSV.parse_row instead of this method. - def CSV.parse_line(src, col_sep = ?,, row_sep = nil) + def CSV.parse_line(src, fs = nil, rs = nil) + fs ||= ',' + if fs.is_a?(Fixnum) + fs = fs.chr + end + if !rs.nil? and rs.is_a?(Fixnum) + rs = rs.chr + end idx = 0 res_type = :DT_COLSEP - cells = Row.new + row = [] begin - while (res_type.equal?(:DT_COLSEP)) - cell = Cell.new - res_type, idx = parse_body(src, idx, cell, col_sep, row_sep) - cells.push(cell.is_null ? nil : cell.data) + while res_type == :DT_COLSEP + res_type, idx, cell = parse_body(src, idx, fs, rs) + row << cell end rescue IllegalFormatError - return Row.new + return [] end - cells + row end # Create a line from cells. each cell is stringified by to_s. - def CSV.generate_line(cells, col_sep = ?,, row_sep = nil) - if (cells.size == 0) + def CSV.generate_line(row, fs = nil, rs = nil) + if row.size == 0 return '' end + fs ||= ',' + if fs.is_a?(Fixnum) + fs = fs.chr + end + if !rs.nil? and rs.is_a?(Fixnum) + rs = rs.chr + end res_type = :DT_COLSEP result_str = '' idx = 0 while true - cell = if (cells[idx].nil?) - Cell.new('', true) - else - Cell.new(cells[idx].to_s, false) - end - generate_body(cell, result_str, col_sep, row_sep) + generate_body(row[idx], result_str, fs, rs) idx += 1 - if (idx == cells.size) + if (idx == row.size) break end - generate_separator(:DT_COLSEP, result_str, col_sep, row_sep) + generate_separator(:DT_COLSEP, result_str, fs, rs) end result_str end - - + # Parse a line from string. Consider using CSV.parse_line instead. # To parse lines in CSV string, see EXAMPLE below. # @@ -226,7 +201,7 @@ class CSV # src[](idx_out_of_bounds) must return nil. A String satisfies this # requirement. # idx: index of parsing location of 'src'. 0 origin. - # out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'. + # out_dev: buffer for parsed cells. Must respond '<<(aString)'. # col_sep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you @@ -236,16 +211,22 @@ class CSV # parsed_cells: num of parsed cells. # idx: index of next parsing location of 'src'. # - def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil) + def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil) + fs ||= ',' + if fs.is_a?(Fixnum) + fs = fs.chr + end + if !rs.nil? and rs.is_a?(Fixnum) + rs = rs.chr + end idx_backup = idx parsed_cells = 0 res_type = :DT_COLSEP begin - while (!res_type.equal?(:DT_ROWSEP)) - cell = Cell.new - res_type, idx = parse_body(src, idx, cell, col_sep, row_sep) - if res_type.equal?(:DT_EOS) - if idx == idx_backup #((parsed_cells == 0) && (cell.is_null)) + while res_type != :DT_ROWSEP + res_type, idx, cell = parse_body(src, idx, fs, rs) + if res_type == :DT_EOS + if idx == idx_backup #((parsed_cells == 0) and cell.nil?) return 0, 0 end res_type = :DT_ROWSEP @@ -258,19 +239,14 @@ class CSV end return parsed_cells, idx end - - + # Convert a line from cells data to string. Consider using CSV.generate_line # instead. To generate multi-row CSV string, see EXAMPLE below. # # EXAMPLE - # def d(str) - # CSV::Cell.new(str, false) - # end - # - # row1 = [d('a'), d('b')] - # row2 = [d('c'), d('d')] - # row3 = [d('e'), d('f')] + # row1 = ['a', 'b'] + # row2 = ['c', 'd'] + # row3 = ['e', 'f'] # src = [row1, row2, row3] # buf = '' # src.each do |row| @@ -280,8 +256,8 @@ class CSV # p buf # # ARGS - # src: an Array of CSV::Cell to be converted to CSV string. Must respond to - # 'size' and '[](idx)'. src[idx] must return CSV::Cell. + # src: an Array of String to be converted to CSV string. Must respond to + # 'size' and '[](idx)'. src[idx] must return String. # cells: num of cells in a line. # out_dev: buffer for generated CSV string. Must respond to '<<(string)'. # col_sep: Column separator. ?, by default. If you want to separate @@ -292,39 +268,47 @@ class CSV # RETURNS # parsed_cells: num of converted cells. # - def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil) + def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil) + fs ||= ',' + if fs.is_a?(Fixnum) + fs = fs.chr + end + if !rs.nil? and rs.is_a?(Fixnum) + rs = rs.chr + end src_size = src.size if (src_size == 0) if cells == 0 - generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep) + generate_separator(:DT_ROWSEP, out_dev, fs, rs) end return 0 end res_type = :DT_COLSEP parsed_cells = 0 - generate_body(src[parsed_cells], out_dev, col_sep, row_sep) + generate_body(src[parsed_cells], out_dev, fs, rs) parsed_cells += 1 - while ((parsed_cells < cells) && (parsed_cells != src_size)) - generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep) - generate_body(src[parsed_cells], out_dev, col_sep, row_sep) + while ((parsed_cells < cells) and (parsed_cells != src_size)) + generate_separator(:DT_COLSEP, out_dev, fs, rs) + generate_body(src[parsed_cells], out_dev, fs, rs) parsed_cells += 1 end if (parsed_cells == cells) - generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep) + generate_separator(:DT_ROWSEP, out_dev, fs, rs) else - generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep) + generate_separator(:DT_COLSEP, out_dev, fs, rs) end parsed_cells end - + + # Private class methods. class << self private - def open_reader(filename, col_sep, row_sep, &block) - file = File.open(filename, 'rb') + def open_reader(path, mode, fs, rs, &block) + file = File.open(path, mode) if block begin - CSV::Reader.parse(file, col_sep, row_sep) do |row| + CSV::Reader.parse(file, fs, rs) do |row| yield(row) end ensure @@ -332,17 +316,17 @@ class CSV end nil else - reader = CSV::Reader.create(file, col_sep, row_sep) + reader = CSV::Reader.create(file, fs, rs) reader.close_on_terminate reader end end - def open_writer(filename, col_sep, row_sep, &block) - file = File.open(filename, 'wb') + def open_writer(path, mode, fs, rs, &block) + file = File.open(path, mode) if block begin - CSV::Writer.generate(file, col_sep, row_sep) do |writer| + CSV::Writer.generate(file, fs, rs) do |writer| yield(writer) end ensure @@ -350,147 +334,177 @@ class CSV end nil else - writer = CSV::Writer.create(file, col_sep, row_sep) + writer = CSV::Writer.create(file, fs, rs) writer.close_on_terminate writer end end - def parse_body(src, idx, cell, col_sep, row_sep) - row_sep_end = row_sep || ?\n - cell.is_null = false + def parse_body(src, idx, fs, rs) + fs_str = fs + fs_size = fs_str.size + rs_str = rs || "\n" + rs_size = rs_str.size + fs_idx = rs_idx = 0 + cell = Cell.new state = :ST_START - quoted = false - cr = false + quoted = cr = false c = nil - while (c = src[idx]) - idx += 1 - result_state = :DT_UNKNOWN - if (c == col_sep) - if state.equal?(:ST_DATA) - if cr - raise IllegalFormatError.new + last_idx = idx + while c = src[idx] + unless quoted + fschar = (c == fs_str[fs_idx]) + rschar = (c == rs_str[rs_idx]) + # simple 1 char backtrack + if !fschar and c == fs_str[0] + fs_idx = 0 + fschar = true + if state == :ST_START + state = :ST_DATA + elsif state == :ST_QUOTE + raise IllegalFormatError end - if (!quoted) - state = :ST_END - result_state = :DT_COLSEP - else - cell.data << c.chr - end - elsif state.equal?(:ST_QUOTE) - if cr - raise IllegalFormatError.new - end - state = :ST_END - result_state = :DT_COLSEP - else # :ST_START - cell.is_null = true - state = :ST_END - result_state = :DT_COLSEP end - elsif (c == ?") # " for vim syntax hilighting. - if state.equal?(:ST_DATA) - if cr - raise IllegalFormatError.new + if !rschar and c == rs_str[0] + rs_idx = 0 + rschar = true + if state == :ST_START + state = :ST_DATA + elsif state == :ST_QUOTE + raise IllegalFormatError end + end + end + if c == ?" + fs_idx = rs_idx = 0 + if cr + raise IllegalFormatError + end + cell << src[last_idx, (idx - last_idx)] + last_idx = idx + if state == :ST_DATA if quoted + last_idx += 1 quoted = false state = :ST_QUOTE else - raise IllegalFormatError.new + raise IllegalFormatError end - elsif state.equal?(:ST_QUOTE) - cell.data << c.chr + elsif state == :ST_QUOTE + cell << c.chr + last_idx += 1 quoted = true state = :ST_DATA else # :ST_START quoted = true + last_idx += 1 state = :ST_DATA end - elsif row_sep.nil? and c == ?\r + elsif fschar or rschar + if fschar + fs_idx += 1 + end + if rschar + rs_idx += 1 + end + sep = nil + if fs_idx == fs_size + if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx + state = :ST_DATA + end + cell << src[last_idx, (idx - last_idx - (fs_size - 1))] + last_idx = idx + fs_idx = rs_idx = 0 + if cr + raise IllegalFormatError + end + sep = :DT_COLSEP + elsif rs_idx == rs_size + if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx + state = :ST_DATA + end + if !(rs.nil? and cr) + cell << src[last_idx, (idx - last_idx - (rs_size - 1))] + last_idx = idx + end + fs_idx = rs_idx = 0 + sep = :DT_ROWSEP + end + if sep + if state == :ST_DATA + return sep, idx + 1, cell; + elsif state == :ST_QUOTE + return sep, idx + 1, cell; + else # :ST_START + return sep, idx + 1, nil + end + end + elsif rs.nil? and c == ?\r + # special \r treatment for backward compatibility + fs_idx = rs_idx = 0 if cr - raise IllegalFormatError.new + raise IllegalFormatError end + cell << src[last_idx, (idx - last_idx)] + last_idx = idx if quoted - cell.data << c.chr state = :ST_DATA else cr = true end - elsif c == row_sep_end - if state.equal?(:ST_DATA) - if cr - state = :ST_END - result_state = :DT_ROWSEP - cr = false - else - if quoted - cell.data << c.chr - state = :ST_DATA - else - state = :ST_END - result_state = :DT_ROWSEP - end - end - elsif state.equal?(:ST_QUOTE) - state = :ST_END - result_state = :DT_ROWSEP - if cr - cr = false - end - else # :ST_START - cell.is_null = true - state = :ST_END - result_state = :DT_ROWSEP - end else - if state.equal?(:ST_DATA) || state.equal?(:ST_START) + fs_idx = rs_idx = 0 + if state == :ST_DATA or state == :ST_START if cr - raise IllegalFormatError.new + raise IllegalFormatError end - cell.data << c.chr state = :ST_DATA else # :ST_QUOTE - raise IllegalFormatError.new + raise IllegalFormatError end end - if state.equal?(:ST_END) - return result_state, idx; - end + idx += 1 end - if state.equal?(:ST_START) - cell.is_null = true - elsif state.equal?(:ST_QUOTE) - true # dummy for coverate; only a data + if state == :ST_START + if fs_idx > 0 or rs_idx > 0 + state = :ST_DATA + else + return :DT_EOS, idx, nil + end elsif quoted - raise IllegalFormatError.new + raise IllegalFormatError elsif cr - raise IllegalFormatError.new + raise IllegalFormatError end - return :DT_EOS, idx + cell << src[last_idx, (idx - last_idx)] + last_idx = idx + return :DT_EOS, idx, cell end - - def generate_body(cells, out_dev, col_sep, row_sep) - row_data = cells.data.dup - if (!cells.is_null) - if (row_data.gsub!('"', '""') || - row_data.include?(col_sep) || - (row_sep && row_data.index(row_sep)) || - (/[\r\n]/ =~ row_data) || - (cells.data.empty?)) + + def generate_body(cell, out_dev, fs, rs) + if cell.nil? + # empty + else + cell = cell.to_s + row_data = cell.dup + if (row_data.gsub!('"', '""') or + row_data.index(fs) or + (rs and row_data.index(rs)) or + (/[\r\n]/ =~ row_data) or + (cell.empty?)) out_dev << '"' << row_data << '"' else out_dev << row_data end end end - - def generate_separator(type, out_dev, col_sep, row_sep) + + def generate_separator(type, out_dev, fs, rs) case type when :DT_COLSEP - out_dev << col_sep.chr + out_dev << fs when :DT_ROWSEP - out_dev << (row_sep ? row_sep.chr : "\r\n") + out_dev << (rs || "\n") end end end @@ -499,7 +513,7 @@ class CSV # CSV formatted string/stream reader. # # EXAMPLE - # read CSV lines until the first column is 'stop'. + # read CSV lines untill the first column is 'stop'. # # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row| # p row @@ -511,30 +525,34 @@ class CSV # Parse CSV data and get lines. Given block is called for each parsed row. # Block value is always nil. Rows are not cached for performance reason. - def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil) - reader = create(str_or_readable, col_sep, row_sep) - reader.each do |row| - yield(row) + def Reader.parse(str_or_readable, fs = ',', rs = nil, &block) + reader = Reader.create(str_or_readable, fs, rs) + if block + reader.each do |row| + yield(row) + end + reader.close + nil + else + reader end - reader.close - nil end # Returns reader instance. - def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil) + def Reader.create(str_or_readable, fs = ',', rs = nil) case str_or_readable when IO - IOReader.new(str_or_readable, col_sep, row_sep) + IOReader.new(str_or_readable, fs, rs) when String - StringReader.new(str_or_readable, col_sep, row_sep) + StringReader.new(str_or_readable, fs, rs) else - IOReader.new(str_or_readable, col_sep, row_sep) + IOReader.new(str_or_readable, fs, rs) end end def each while true - row = Row.new + row = [] parsed_cells = get_row(row) if parsed_cells == 0 break @@ -545,7 +563,7 @@ class CSV end def shift - row = Row.new + row = [] parsed_cells = get_row(row) row end @@ -557,25 +575,23 @@ class CSV private def initialize(dev) - raise RuntimeError.new('do not instantiate this class directly') + raise RuntimeError.new('Do not instanciate this class directly.') end def get_row(row) - raise NotImplementedError.new( - 'method get_row must be defined in a derived class') + raise NotImplementedError.new('Method get_row must be defined in a derived class.') end def terminate # Define if needed. end end - + class StringReader < Reader - - def initialize(string, col_sep = ?,, row_sep = nil) - @col_sep = col_sep - @row_sep = row_sep + def initialize(string, fs = ',', rs = nil) + @fs = fs + @rs = rs @dev = string @idx = 0 if @dev[0, 3] == "\xef\xbb\xbf" @@ -586,9 +602,8 @@ class CSV private def get_row(row) - parsed_cells, next_idx = - CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep) - if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size + parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs) + if parsed_cells == 0 and next_idx == 0 and @idx != @dev.size raise IllegalFormatError.new end @idx = next_idx @@ -598,12 +613,10 @@ class CSV class IOReader < Reader - - def initialize(io, col_sep = ?,, row_sep = nil) + def initialize(io, fs = ',', rs = nil) @io = io - @io.binmode if @io.respond_to?(:binmode) - @col_sep = col_sep - @row_sep = row_sep + @fs = fs + @rs = rs @dev = CSV::IOBuf.new(@io) @idx = 0 if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf @@ -621,9 +634,8 @@ class CSV private def get_row(row) - parsed_cells, next_idx = - CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep) - if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos? + parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs) + if parsed_cells == 0 and next_idx == 0 and !@dev.is_eos? raise IllegalFormatError.new end dropped = @dev.drop(next_idx) @@ -651,56 +663,36 @@ class CSV # outfile = File.open('csvout', 'wb') # CSV::Writer.generate(outfile) do |csv| # csv << ['c1', nil, '', '"', "\r\n", 'c2'] - # # or - # csv.add_row [ - # CSV::Cell.new('c1', false), - # CSV::Cell.new('dummy', true), - # CSV::Cell.new('', false), - # CSV::Cell.new('"', false), - # CSV::Cell.new("\r\n", false) - # CSV::Cell.new('c2', false) - # ] - # ... # ... # end # # outfile.close # class Writer - - # Generate CSV. Given block is called with the writer instance. - def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil) - writer = Writer.create(str_or_writable, col_sep, row_sep) - yield(writer) - writer.close - nil + # Given block is called with the writer instance. str_or_writable must + # handle '<<(string)'. + def Writer.generate(str_or_writable, fs = ',', rs = nil, &block) + writer = Writer.create(str_or_writable, fs, rs) + if block + yield(writer) + writer.close + nil + else + writer + end end # str_or_writable must handle '<<(string)'. - def Writer.create(str_or_writable, col_sep = ?,, row_sep = nil) - BasicWriter.new(str_or_writable, col_sep, row_sep) + def Writer.create(str_or_writable, fs = ',', rs = nil) + BasicWriter.new(str_or_writable, fs, rs) end # dump CSV stream to the device. argument must be an Array of String. - def <<(ary) - row = ary.collect { |item| - if item.is_a?(Cell) - item - elsif (item.nil?) - Cell.new('', true) - else - Cell.new(item.to_s, false) - end - } - CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep) - self - end - - # dump CSV stream to the device. argument must be an Array of CSV::Cell. - def add_row(row) - CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep) + def <<(row) + CSV.generate_row(row, row.size, @dev, @fs, @rs) self end + alias add_row << def close terminate @@ -709,7 +701,7 @@ class CSV private def initialize(dev) - raise RuntimeError.new('do not instantiate this class directly') + raise RuntimeError.new('Do not instanciate this class directly.') end def terminate @@ -719,12 +711,10 @@ class CSV class BasicWriter < Writer - - def initialize(str_or_writable, col_sep = ?,, row_sep = nil) - @col_sep = col_sep - @row_sep = row_sep + def initialize(str_or_writable, fs = ',', rs = nil) + @fs = fs + @rs = rs @dev = str_or_writable - @dev.binmode if @dev.respond_to?(:binmode) @close_on_terminate = false end @@ -743,6 +733,7 @@ class CSV end end +private # Buffered stream. # @@ -756,7 +747,7 @@ class CSV # end # # # define my own 'read' method. - # # CAUTION: Returning nil means EndOfStream. + # # CAUTION: Returning nil means EnfOfStream. # def read(size) # @s.read(size) # end @@ -801,8 +792,7 @@ class CSV # end # end # - class StreamBuf # pure virtual. (do not instantiate it directly) - + class StreamBuf # get a char or a partial string from the stream. # idx: index of a string to specify a start point of a string to get. # unlike String instance, idx < 0 returns nil. @@ -810,7 +800,7 @@ class CSV # returns char at idx if n == nil. # returns a partial string, from idx to (idx + n) if n != nil. at EOF, # the string size could not equal to arg n. - def [](idx, n = nil) + def [](idx, n = nil) if idx < 0 return nil end @@ -838,11 +828,11 @@ class CSV end loc = my_offset + next_idx if !n - return @buf_list[my_buf][loc] # Fixnum of char code. + return @buf_list[my_buf][loc] # Fixnum of char code. elsif (loc + n - 1 < buf_size(my_buf)) - return @buf_list[my_buf][loc, n] # String. + return @buf_list[my_buf][loc, n] # String. else # should do loop insted of (tail) recursive call... - res = @buf_list[my_buf][loc, BufSize] + res = @buf_list[my_buf][loc, BufSize] size_added = buf_size(my_buf) - loc if size_added > 0 idx += size_added @@ -856,7 +846,7 @@ class CSV end end alias get [] - + # drop a string from the stream. # returns dropped size. at EOF, dropped size might not equals to arg n. # Once you drop the head of the stream, access to the dropped part via [] @@ -867,7 +857,7 @@ class CSV end size_dropped = 0 while (n > 0) - if (!@is_eos || (@cur_buf != @buf_tail_idx)) + if !@is_eos or (@cur_buf != @buf_tail_idx) if (@offset + n < buf_size(@cur_buf)) size_dropped += n @offset += n @@ -888,11 +878,11 @@ class CSV end size_dropped end - + def is_eos? return idx_is_eos?(0) end - + # WARN: Do not instantiate this class directly. Define your own class # which derives this class and define 'read' instance method. def initialize @@ -903,24 +893,23 @@ class CSV add_buf @cur_buf = @buf_tail_idx end - + protected def terminate while (rel_buf); end end - + # protected method 'read' must be defined in derived classes. # CAUTION: Returning a string which size is not equal to 'size' means - # EndOfStream. When it is not at EOS, you must block the callee, try to + # EnfOfStream. When it is not at EOS, you must block the callee, try to # read and return the sized string. def read(size) # raise EOFError - raise NotImplementedError.new( - 'method read must be defined in a derived class') + raise NotImplementedError.new('Method read must be defined in a derived class.') end - + private - + def buf_size(idx) @buf_list[idx].size end @@ -948,7 +937,7 @@ class CSV true end end - + def rel_buf if (@cur_buf < 0) return false @@ -962,15 +951,14 @@ class CSV return true end end - + def idx_is_eos?(idx) - (@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx))) + (@is_eos and ((@cur_buf < 0) or (@cur_buf == @buf_tail_idx))) end - + BufSize = 1024 * 8 end - # Buffered IO. # # EXAMPLE @@ -986,7 +974,7 @@ class CSV @s = s super() end - + def close terminate end @@ -996,7 +984,7 @@ class CSV def read(size) @s.read(size) end - + def terminate super() end -- cgit v1.2.3