From dd03a2ff20bf146885066435e0cfafd3fe70d757 Mon Sep 17 00:00:00 2001 From: nahi Date: Mon, 16 Feb 2004 14:41:49 +0000 Subject: * lib/csv.rb: document reduction. [ruby-core:02429] * test/yaml/test_yaml.rb: added 0..1 test with "0".."1" on display. it should be defined that the specification about what kind of Range is supported in ruby's custom type in YAML. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5731 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 8 + lib/csv.rb | 1212 +++++++++++++++++------------------------------- test/yaml/test_yaml.rb | 2 + 3 files changed, 442 insertions(+), 780 deletions(-) diff --git a/ChangeLog b/ChangeLog index bffeb3613d..3efda1f7bc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Mon Feb 16 23:28:14 2004 NAKAMURA, Hiroshi + + * lib/csv.rb: document reduction. [ruby-core:02429] + + * test/yaml/test_yaml.rb: added 0..1 test with "0".."1" on display. + it should be defined that the specification about what kind of Range + is supported in ruby's custom type in YAML. + Mon Feb 16 22:22:00 2004 Gavin Sinclair * lib/generator.rb: corrected doc format diff --git a/lib/csv.rb b/lib/csv.rb index cfc5489ea0..205c3385c4 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -1,59 +1,32 @@ # CSV -- module for generating/parsing CSV data. - + # $Id$ - -# This module is copyrighted free software by NAKAMURA, Hiroshi. -# You can redistribute it and/or modify it under the same term as Ruby. - - + +# This program is copyrighted free software by NAKAMURA, Hiroshi. You can +# redistribute it and/or modify it under the same terms of Ruby's license; +# either the dual license version in 2003, or any later version. + + class CSV -public - # DESCRIPTION - # CSV::Cell -- Describes 1 cell of CSV. - # + # Describes a cell of CSV. class Cell - public - # Datum as string. attr_accessor :data - - # Is this datum null? + + # Is this datum NULL? attr_accessor :is_null - # SYNOPSIS - # cell = CSV::Cell.new(data = '', is_null = true) - # - # ARGS - # data: datum as String - # is_null: is this datum null? - # - # RETURNS - # cell: Created instance. - # - # DESCRIPTION - # Create instance. If is_null is true, datum is stored in the instance - # created but it should be treated as 'NULL'. - # + # If is_null is true, datum is stored in the instance created but it + # should be treated as 'NULL'. def initialize(data = '', is_null = true) @data = data @is_null = is_null end - # SYNOPSIS - # CSV::Cell#match(rhs) - # - # ARGS - # rhs: an instance of CSV::Cell to be compared. - # - # RETURNS - # true/false. See the souce if you want to know matching algorithm. - # - # DESCRIPTION - # Compare another cell with me. Bare in mind Null matches with Null - # using this method. Use CSV::Cell#== if you want Null never matches - # with other data including Null. - # + # Compares another cell with self. Bear in mind NULL matches with NULL. + # Use CSV::Cell#== if you don't want NULL matches with NULL. + # rhs: an instance of CSV::Cell to be compared. def match(rhs) if @is_null and rhs.is_null true @@ -64,20 +37,9 @@ public end end - # SYNOPSIS - # CSV::Cell#==(rhs) - # - # ARGS - # rhs: an instance of CSV::Cell to be compared. - # - # RETURNS - # true/false. See the souce if you want to know matching algorithm. - # - # DESCRIPTION - # Compare another cell with me. Bare in mind Null is not match with - # Null using this method. Null never matches with other data including - # Null. Use CSV::Cell#match if you want Null matches with Null. - # + # Compares another cell with self. Bear in mind NULL does not match with + # NULL. Use CSV::Cell#match if you want NULL matches with NULL. + # rhs: an instance of CSV::Cell to be compared. def ==(rhs) if @is_null or rhs.is_null false @@ -100,39 +62,18 @@ public @is_null ? nil : data end end - - # DESCRIPTION - # CSV::Row -- Describes a row of CSV. Each element must be a CSV::Cell. - # + + # Describes a row of CSV. Each element must be a CSV::Cell. class Row < Array - public - - # SYNOPSIS - # CSV::Row#to_a - # - # RETURNS - # An Array of String. - # - # DESCRIPTION - # Convert CSV::Cell to String. Null is converted to nil. - # + + # Returns the strings contained in the row's cells. def to_a self.collect { |cell| cell.is_null ? nil : cell.data } end - # SYNOPSIS - # CSV::Row#match(rhs) - # - # ARGS - # rhs: an Array of cells. Each cell is a instance of CSV::Cell. - # - # RETURNS - # true/false. See the souce if you want to know matching algorithm. - # - # DESCRIPTION - # Compare another row with me. - # + # Compares another row with self. + # rhs: an Array of cells. Each cell should be a CSV::Cell. def match(rhs) if self.size != rhs.size return false @@ -147,62 +88,9 @@ public end - # SYNOPSIS - # 1. reader = CSV.open(filename, 'r') - # - # 2. CSV.open(filename, 'r') do |row| - # ... - # end - # - # 3. writer = CSV.open(filename, 'w') - # - # 4. CSV.open(filename, 'w') do |writer| - # ... - # end - # - # ARGS - # filename: filename to open. - # mode: 'r' for read (parse) - # 'w' for write (generate) - # row: an Array of cells which is a parsed line. - # writer: Created writer instance. See CSV::Writer#<< and - # CSV::Writer#add_row to know how to generate CSV string. - # - # RETURNS - # reader: Create reader instance. To get parse result, see - # CSV::Reader#each. - # writer: Created writer instance. See CSV::Writer#<< and - # CSV::Writer#add_row to know how to generate CSV string. - # - # DESCRIPTION - # Open a CSV formatted file to read or write. - # - # EXAMPLE 1 - # reader = CSV.open('csvfile.csv', 'r') - # row1 = reader.shift - # row2 = reader.shift - # if row2.empty? - # p 'row2 not find.' - # end - # reader.close - # - # EXAMPLE 2 - # CSV.open('csvfile.csv', 'r') do |row| - # p row - # end - # - # EXAMPLE 3 - # writer = CSV.open('csvfile.csv', 'w') - # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil] - # writer.close - # - # EXAMPLE 4 - # CSV.open('csvfile.csv', 'w') do |writer| - # writer << ['r1c1', 'r1c2'] - # writer << ['r2c1', 'r2c2'] - # writer << [nil, nil] - # end - # + class IllegalFormatError < RuntimeError; end + + def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block) if mode == 'r' or mode == 'rb' open_reader(filename, col_sep, row_sep, &block) @@ -213,530 +101,71 @@ public end end - def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block) - open_reader(filename, col_sep, row_sep, &block) - end - - def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block) - open_writer(filename, col_sep, row_sep, &block) - end - - # Private class methods. - class << self - private - def open_reader(filename, col_sep, row_sep, &block) - file = File.open(filename, 'rb') - if block - begin - CSV::Reader.parse(file, col_sep, row_sep) do |row| - yield(row) - end - ensure - file.close - end - nil - else - reader = CSV::Reader.create(file, col_sep, row_sep) - reader.close_on_terminate - reader - end - end - - def open_writer(filename, col_sep, row_sep, &block) - file = File.open(filename, 'wb') - if block - begin - CSV::Writer.generate(file, col_sep, row_sep) do |writer| - yield(writer) - end - ensure - file.close - end - nil - else - writer = CSV::Writer.create(file, col_sep, row_sep) - writer.close_on_terminate - writer - end - end - end - - - # DESCRIPTION - # CSV::Reader -- CSV formatted string/stream reader. - # - # EXAMPLE - # Read CSV lines untill the first column is 'stop'. + # Open a CSV formatted file for reading. # - # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row| - # p row - # break if !row[0].is_null && row[0].data == 'stop' + # EXAMPLE 1 + # reader = CSV.parse('csvfile.csv') + # row1 = reader.shift + # row2 = reader.shift + # if row2.empty? + # p 'row2 not find.' # end + # reader.close # - class Reader - include Enumerable - public - - # SYNOPSIS - # reader = CSV::Reader.create(str_or_readable) - # - # ARGS - # str_or_readable: a CSV data to be parsed. A String or an IO. - # - # RETURNS - # reader: Created instance. - # - # DESCRIPTION - # Create instance. To get parse result, see CSV::Reader#each. - # - def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil) - case str_or_readable - when IO - IOReader.new(str_or_readable, col_sep, row_sep) - when String - StringReader.new(str_or_readable, col_sep, row_sep) - else - IOReader.new(str_or_readable, col_sep, row_sep) - end - end - - # SYNOPSIS - # CSV::Reader.parse(str_or_readable) do |row| - # ... - # end - # - # ARGS - # str_or_readable: a CSV data to be parsed. A String or an IO. - # row: a CSV::Row; an Array of a CSV::Cell in a line. - # - # RETURNS - # nil - # - # DESCRIPTION - # Parse CSV data and get lines. Caller block is called for each line - # with an argument which is a chunk of cells in a row. - # - # Block value is always nil. Rows are not cached for performance - # reason. - # - def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil) - reader = create(str_or_readable, col_sep, row_sep) - reader.each do |row| - yield(row) - end - reader.close - nil - end - - # SYNOPSIS - # CSV::Reader#each do |row| - # ... - # end - # - # ARGS - # row: a CSV::Row; an Array of a CSV::Cell in a line. - # - # RETURNS - # nil - # - # DESCRIPTION - # Caller block is called for each line with an argument which is a chunk - # of cells in a row. - # - # Block value is always nil. Rows are not cached for performance - # reason. - # - def each - while true - row = Row.new - parsed_cells = get_row(row) - if parsed_cells == 0 - break - end - yield(row) - end - nil - end - - # SYNOPSIS - # cell = CSV::Reader#shift - # - # RETURNS - # cell: a CSV::Row; an Array of a CSV::Cell. - # - # DESCRIPTION - # Extract cells of next line. - # - def shift - row = Row.new - parsed_cells = get_row(row) - row - end - - # SYNOPSIS - # CSV::Reader#close - # - # RETURNS - # nil - # - # DESCRIPTION - # Close this reader. - # - def close - terminate - end - - private - def initialize(dev) - raise RuntimeError.new('Do not instanciate this class directly.') - end - - def get_row(row) - raise NotImplementedError.new('Method get_row must be defined in a derived class.') - end - - def terminate - # Define if needed. - end - end - - - # DESCRIPTION - # CSV::StringReader -- CSV formatted stream reader. - # - # EXAMPLE - # Read CSV lines untill the first column is 'stop'. - # - # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row| + # EXAMPLE 2 + # CSV.parse('csvfile.csv') do |row| # p row - # break if !row[0].is_null && row[0].data == 'stop' # end # - class StringReader < Reader - public - - # SYNOPSIS - # reader = CSV::StringReader.new(string) - # - # ARGS - # string: a CSV String to be parsed. - # - # RETURNS - # reader: Created instance. - # - # DESCRIPTION - # Create instance. To get parse result, see CSV::Reader#each. - # - def initialize(string, col_sep = ?,, row_sep = nil) - @col_sep = col_sep - @row_sep = row_sep - @dev = string - @idx = 0 - if @dev[0, 3] == "\xef\xbb\xbf" - @idx += 3 - end - end - - private - def get_row(row) - parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep) - if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size - raise IllegalFormatError.new - end - @idx = next_idx - parsed_cells - end - end - - - # DESCRIPTION - # CSV::IOReader -- CSV formatted stream reader. - # - # EXAMPLE - # Read CSV lines untill the first column is 'stop'. + # ARGS + # filename: filename to parse. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. # - # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row| - # p row - # break if !row[0].is_null && row[0].data == 'stop' - # end + # RETURNS + # reader instance. To get parse result, see CSV::Reader#each. # - class IOReader < Reader - public - - # SYNOPSIS - # reader = CSV::IOReader.new(io) - # - # ARGS - # io: a CSV data to be parsed. Must be an IO. (io#read is called.) - # - # RETURNS - # reader: Created instance. - # - # DESCRIPTION - # Create instance. To get parse result, see CSV::Reader#each. - # - def initialize(io, col_sep = ?,, row_sep = nil) - @io = io - @io.binmode if @io.respond_to?(:binmode) - @col_sep = col_sep - @row_sep = row_sep - @dev = CSV::IOBuf.new(@io) - @idx = 0 - if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf - @idx += 3 - end - @close_on_terminate = false - end - - # SYNOPSIS - # CSV::IOReader#close_on_terminate - # - # RETURNS - # true - # - # DESCRIPTION - # Tell this reader to close the IO when terminated (Triggered by invoking - # CSV::IOReader#close). - # - def close_on_terminate - @close_on_terminate = true - end - - private - def get_row(row) - parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep) - if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos? - raise IllegalFormatError.new - end - dropped = @dev.drop(next_idx) - @idx = next_idx - dropped - parsed_cells - end - - def terminate - if @close_on_terminate - @io.close - end - - if @dev - @dev.close - end - end + def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block) + open_reader(filename, col_sep, row_sep, &block) end - - # DESCRIPTION - # CSV::Writer -- CSV formatted string/stream writer. - # - # EXAMPLE - # Write rows to 'csvout' file. + # Open a CSV formatted file for writing. # - # outfile = File.open('csvout', 'wb') - # CSV::Writer.generate(outfile) do |csv| - # csv << ['c1', nil, '', '"', "\r\n", 'c2'] - # # or - # csv.add_row [ - # CSV::Cell.new('c1', false), - # CSV::Cell.new('dummy', true), - # CSV::Cell.new('', false), - # CSV::Cell.new('"', false), - # CSV::Cell.new("\r\n", false) - # CSV::Cell.new('c2', false) - # ] - # ... - # ... - # end - # - # outfile.close + # EXAMPLE 1 + # writer = CSV.generate('csvfile.csv') + # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil] + # writer.close # - class Writer - public - - # SYNOPSIS - # writer = CSV::Writer.create(str_or_readable) - # - # ARGS - # str_or_writable: device for generated CSV string. Must respond to - # '<<(string)'. - # - # RETURNS - # writer: Created instance. - # - # DESCRIPTION - # Create instance. To add CSV data to generate CSV string, see - # CSV::Writer#<< or CSV::Writer#add_row. - # - def Writer.create(str_or_writable, col_sep = ?,, row_sep = nil) - BasicWriter.new(str_or_writable, col_sep, row_sep) - end - - # SYNOPSIS - # CSV::Writer.generate(str_or_writable) do |writer| - # ... - # end - # - # ARGS - # str_or_writable: device for generated CSV string. Must respond to - # '<<(string)'. - # writer: Created writer instance. See CSV::Writer#<< and - # CSV::Writer#add_row to know how to generate CSV string. - # - # RETURNS - # nil - # - # DESCRIPTION - # Create writer instance. Caller block is called with the new instance. - # To add CSV data to generate CSV string, see CSV::Writer#<< or - # CSV::Writer#add_row. - # - def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil) - writer = Writer.create(str_or_writable, col_sep, row_sep) - yield(writer) - writer.close - nil - end - - # SYNOPSIS - # CSV::Writer#<<(row) - # - # ARGS - # row: an Array of a String. - # - # RETURNS - # self - # - # DESCRIPTION - # Dump CSV stream to the device. Argument is an array of a String like - # ['c1', 'c2', 'c3']. - # - def <<(ary) - row = ary.collect { |item| - if item.is_a?(Cell) - item - elsif (item.nil?) - Cell.new('', true) - else - Cell.new(item.to_s, false) - end - } - CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep) - self - end - - # SYNOPSIS - # CSV::Writer#<<(row) - # - # ARGS - # row: an Array of a CSV::Cell. - # - # RETURNS - # self - # - # DESCRIPTION - # Dump CSV stream to the device. Argument is an array of a CSV::Cell - # like [CSV::Cell.new('c1', false), CSV::Cell.new('dummy', true)]. - # (Formar is 'c1' and latter is Null.) - # - def add_row(row) - CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep) - self - end - - # SYNOPSIS - # CSV::Writer#close - # - # RETURNS - # nil - # - # DESCRIPTION - # Close this writer. - # - def close - terminate - end - - private - def initialize(dev) - raise RuntimeError.new('Do not instanciate this class directly.') - end - - def terminate - # Define if needed. - end - end - - - # DESCRIPTION - # CSV::BasicWriter -- CSV formatted string/stream writer using <<. - # - class BasicWriter < Writer - public - - # SYNOPSIS - # writer = CSV::BasicWriter.new(str_or_writable) - # - # ARGS - # str_or_writable: device for generated CSV string. Must respond to - # '<<(string)'. - # - # RETURNS - # writer: Created instance. - # - # DESCRIPTION - # Create instance. To add CSV data to generate CSV string, see - # CSV::Writer#<< or CSV::Writer#add_row. - # - def initialize(str_or_writable, col_sep = ?,, row_sep = nil) - @col_sep = col_sep - @row_sep = row_sep - @dev = str_or_writable - @dev.binmode if @dev.respond_to?(:binmode) - @close_on_terminate = false - end - - # SYNOPSIS - # CSV::BasicWriter#close_on_terminate - # - # RETURNS - # true - # - # DESCRIPTION - # Tell this writer to close the IO when terminated (Triggered by invoking - # CSV::BasicWriter#close). - # - def close_on_terminate - @close_on_terminate = true - end - - private - def terminate - if @close_on_terminate - @dev.close - end - end - end - - # SYNOPSIS - # cells = CSV.parse_line(src, col_sep = ?,, row_sep = nil) + # EXAMPLE 2 + # CSV.generate('csvfile.csv') do |writer| + # writer << ['r1c1', 'r1c2'] + # writer << ['r2c1', 'r2c2'] + # writer << [nil, nil] + # end # # ARGS - # src: a CSV String. + # filename: filename to generate. # col_sep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you # want to separate records with \r, give ?\r here. # # RETURNS - # cells: an Array of parsed cells in first line. Each cell is a String. + # writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how + # to generate CSV string. # - # DESCRIPTION - # Parse one line from given string. Bare in mind it parses ONE LINE. Rest - # of the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the - # second line 'c,d' is ignored. + def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block) + open_writer(filename, col_sep, row_sep, &block) + end + + # Parse a line from given string. Bear in mind it parses ONE LINE. Rest of + # the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the + # second line 'c,d' is ignored. # - # If you don't know whether a target string to parse is exactly 1 line or - # not, use CSV.parse_row instead of this method. - # + # If you don't know whether a target string to parse is exactly 1 line or + # not, use CSV.parse_row instead of this method. def CSV.parse_line(src, col_sep = ?,, row_sep = nil) idx = 0 res_type = :DT_COLSEP @@ -752,25 +181,8 @@ public end cells end - - # SYNOPSIS - # str = CSV.generate_line(cells, col_sep = ?,, row_sep = nil) - # - # ARGS - # cells: an Array of cell to be converted to CSV string. Each cell must - # respond to 'to_s'. - # col_sep: Column separator. ?, by default. If you want to separate - # fields with semicolon, give ?; here. - # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you - # want to separate records with \r, give ?\r here. - # - # RETURNS - # str: a String of generated CSV string. - # - # DESCRIPTION - # Create a line from cells. Each cell is stringified by to_s. - # + # Create a line from cells. each cell is stringified by to_s. def CSV.generate_line(cells, col_sep = ?,, row_sep = nil) if (cells.size == 0) return '' @@ -793,9 +205,20 @@ public end result_str end - - # SYNOPSIS - # parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil) + + + # Parse a line from string. Consider using CSV.parse_line instead. + # To parse lines in CSV string, see EXAMPLE below. + # + # EXAMPLE + # src = "a,b\r\nc,d\r\ne,f" + # idx = 0 + # begin + # parsed = [] + # parsed_cells, idx = CSV.parse_row(src, idx, parsed) + # puts "Parsed #{ parsed_cells } cells." + # p parsed + # end while parsed_cells > 0 # # ARGS # src: a CSV data to be parsed. Must respond '[](idx)'. @@ -813,20 +236,6 @@ public # parsed_cells: num of parsed cells. # idx: index of next parsing location of 'src'. # - # DESCRIPTION - # Parse a line from string. To parse lines in CSV string, see EXAMPLE - # below. - # - # EXAMPLE - # src = "a,b\r\nc,d\r\ne,f" - # idx = 0 - # begin - # parsed = [] - # parsed_cells, idx = CSV.parse_row(src, idx, parsed) - # puts "Parsed #{ parsed_cells } cells." - # p parsed - # end while parsed_cells > 0 - # def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil) idx_backup = idx parsed_cells = 0 @@ -849,26 +258,10 @@ public end return parsed_cells, idx end - - # SYNOPSIS - # parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil) - # - # ARGS - # src: an Array of CSV::Cell to be converted to CSV string. Must respond to - # 'size' and '[](idx)'. src[idx] must return CSV::Cell. - # cells: num of cells in a line. - # out_dev: buffer for generated CSV string. Must respond to '<<(string)'. - # col_sep: Column separator. ?, by default. If you want to separate - # fields with semicolon, give ?; here. - # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you - # want to separate records with \r, give ?\r here. - # - # RETURNS - # parsed_cells: num of converted cells. - # - # DESCRIPTION - # Convert a line from cells data to string. To generate multi-row CSV - # string, See EXAMPLE below. + + + # Convert a line from cells data to string. Consider using CSV.generate_line + # instead. To generate multi-row CSV string, see EXAMPLE below. # # EXAMPLE # def d(str) @@ -885,7 +278,20 @@ public # puts "Created #{ parsed_cells } cells." # end # p buf - # + # + # ARGS + # src: an Array of CSV::Cell to be converted to CSV string. Must respond to + # 'size' and '[](idx)'. src[idx] must return CSV::Cell. + # cells: num of cells in a line. + # out_dev: buffer for generated CSV string. Must respond to '<<(string)'. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. + # + # RETURNS + # parsed_cells: num of converted cells. + # def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil) src_size = src.size if (src_size == 0) @@ -908,15 +314,47 @@ public else generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep) end - parsed_cells - end - -private - class IllegalFormatError < RuntimeError; end - - # Private class methods. - class << self - private + parsed_cells + end + + class << self + private + + def open_reader(filename, col_sep, row_sep, &block) + file = File.open(filename, 'rb') + if block + begin + CSV::Reader.parse(file, col_sep, row_sep) do |row| + yield(row) + end + ensure + file.close + end + nil + else + reader = CSV::Reader.create(file, col_sep, row_sep) + reader.close_on_terminate + reader + end + end + + def open_writer(filename, col_sep, row_sep, &block) + file = File.open(filename, 'wb') + if block + begin + CSV::Writer.generate(file, col_sep, row_sep) do |writer| + yield(writer) + end + ensure + file.close + end + nil + else + writer = CSV::Writer.create(file, col_sep, row_sep) + writer.close_on_terminate + writer + end + end def parse_body(src, idx, cell, col_sep, row_sep) row_sep_end = row_sep || ?\n @@ -1031,7 +469,7 @@ private end return :DT_EOS, idx end - + def generate_body(cells, out_dev, col_sep, row_sep) row_data = cells.data.dup if (!cells.is_null) @@ -1046,7 +484,7 @@ private end end end - + def generate_separator(type, out_dev, col_sep, row_sep) case type when :DT_COLSEP @@ -1058,8 +496,255 @@ private end - # DESCRIPTION - # CSV::StreamBuf -- a class for a bufferd stream. + # CSV formatted string/stream reader. + # + # EXAMPLE + # read CSV lines untill the first column is 'stop'. + # + # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row| + # p row + # break if !row[0].is_null && row[0].data == 'stop' + # end + # + class Reader + include Enumerable + + # Parse CSV data and get lines. Given block is called for each parsed row. + # Block value is always nil. Rows are not cached for performance reason. + def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil) + reader = create(str_or_readable, col_sep, row_sep) + reader.each do |row| + yield(row) + end + reader.close + nil + end + + # Returns reader instance. + def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil) + case str_or_readable + when IO + IOReader.new(str_or_readable, col_sep, row_sep) + when String + StringReader.new(str_or_readable, col_sep, row_sep) + else + IOReader.new(str_or_readable, col_sep, row_sep) + end + end + + def each + while true + row = Row.new + parsed_cells = get_row(row) + if parsed_cells == 0 + break + end + yield(row) + end + nil + end + + def shift + row = Row.new + parsed_cells = get_row(row) + row + end + + def close + terminate + end + + private + + def initialize(dev) + raise RuntimeError.new('do not instanciate this class directly') + end + + def get_row(row) + raise NotImplementedError.new( + 'method get_row must be defined in a derived class') + end + + def terminate + # Define if needed. + end + end + + + class StringReader < Reader + + def initialize(string, col_sep = ?,, row_sep = nil) + @col_sep = col_sep + @row_sep = row_sep + @dev = string + @idx = 0 + if @dev[0, 3] == "\xef\xbb\xbf" + @idx += 3 + end + end + + private + + def get_row(row) + parsed_cells, next_idx = + CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep) + if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size + raise IllegalFormatError.new + end + @idx = next_idx + parsed_cells + end + end + + + class IOReader < Reader + + def initialize(io, col_sep = ?,, row_sep = nil) + @io = io + @io.binmode if @io.respond_to?(:binmode) + @col_sep = col_sep + @row_sep = row_sep + @dev = CSV::IOBuf.new(@io) + @idx = 0 + if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf + @idx += 3 + end + @close_on_terminate = false + end + + # Tell this reader to close the IO when terminated (Triggered by invoking + # CSV::IOReader#close). + def close_on_terminate + @close_on_terminate = true + end + + private + + def get_row(row) + parsed_cells, next_idx = + CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep) + if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos? + raise IllegalFormatError.new + end + dropped = @dev.drop(next_idx) + @idx = next_idx - dropped + parsed_cells + end + + def terminate + if @close_on_terminate + @io.close + end + + if @dev + @dev.close + end + end + end + + + # CSV formatted string/stream writer. + # + # EXAMPLE + # Write rows to 'csvout' file. + # + # outfile = File.open('csvout', 'wb') + # CSV::Writer.generate(outfile) do |csv| + # csv << ['c1', nil, '', '"', "\r\n", 'c2'] + # # or + # csv.add_row [ + # CSV::Cell.new('c1', false), + # CSV::Cell.new('dummy', true), + # CSV::Cell.new('', false), + # CSV::Cell.new('"', false), + # CSV::Cell.new("\r\n", false) + # CSV::Cell.new('c2', false) + # ] + # ... + # ... + # end + # + # outfile.close + # + class Writer + + # Generate CSV. Given block is called with the writer instance. + def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil) + writer = Writer.create(str_or_writable, col_sep, row_sep) + yield(writer) + writer.close + nil + end + + # str_or_writable must handle '<<(string)'. + def Writer.create(str_or_writable, col_sep = ?,, row_sep = nil) + BasicWriter.new(str_or_writable, col_sep, row_sep) + end + + # dump CSV stream to the device. argument must be an Array of String. + def <<(ary) + row = ary.collect { |item| + if item.is_a?(Cell) + item + elsif (item.nil?) + Cell.new('', true) + else + Cell.new(item.to_s, false) + end + } + CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep) + self + end + + # dump CSV stream to the device. argument must be an Array of CSV::Cell. + def add_row(row) + CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep) + self + end + + def close + terminate + end + + private + + def initialize(dev) + raise RuntimeError.new('do not instanciate this class directly') + end + + def terminate + # Define if needed. + end + end + + + class BasicWriter < Writer + + def initialize(str_or_writable, col_sep = ?,, row_sep = nil) + @col_sep = col_sep + @row_sep = row_sep + @dev = str_or_writable + @dev.binmode if @dev.respond_to?(:binmode) + @close_on_terminate = false + end + + # Tell this writer to close the IO when terminated (Triggered by invoking + # CSV::BasicWriter#close). + def close_on_terminate + @close_on_terminate = true + end + + private + + def terminate + if @close_on_terminate + @dev.close + end + end + end + + + # Buffered stream. # # EXAMPLE 1 -- an IO. # class MyBuf < StreamBuf @@ -1101,14 +786,14 @@ private # # EXAMPLE 2 -- String. # This is a conceptual example. No pros with this. - # + # # class StrBuf < StreamBuf # def initialize(s) # @str = s # @idx = 0 # super() # end - # + # # def read(size) # str = @str[@idx, size] # @idx += str.size @@ -1117,26 +802,15 @@ private # end # class StreamBuf # pure virtual. (do not instanciate it directly) - public - - # SYNOPSIS - # char/str = CSV::StreamBuf#get(idx, n = nil) - # char/str = CSV::StreamBuf#[idx, n = nil] - # - # ARGS - # idx: index of a string to specify a start point of a string to get. - # Unlike String instance, idx < 0 returns nil. - # n: size of a string to get. - # - # RETURNS - # char: if n == nil. A char at idx. - # str: if n != nil. A partial string, from idx to (idx + size). At - # EOF, the string size could not equal to arg n. - # - # DESCRIPTION - # Get a char or a partial string from the stream. - # - def [](idx, n = nil) + + # get a char or a partial string from the stream. + # idx: index of a string to specify a start point of a string to get. + # unlike String instance, idx < 0 returns nil. + # n: size of a string to get. + # returns char at idx if n == nil. + # returns a partial string, from idx to (idx + n) if n != nil. at EOF, + # the string size could not equal to arg n. + def [](idx, n = nil) if idx < 0 return nil end @@ -1182,21 +856,11 @@ private end end alias get [] - - # SYNOPSIS - # size_dropped = CSV::StreamBuf#drop(n) - # - # ARGS - # n: drop size - # - # RETURNS - # size_dropped: droped size. At EOF, dropped size might not equals to arg n. - # 0 if n <= 0. - # - # DESCRIPTION - # Drop a string from the stream. Once you drop the head of the stream, - # access to the dropped part via [] or get returns nil. - # + + # drop a string from the stream. + # returns dropped size. at EOF, dropped size might not equals to arg n. + # Once you drop the head of the stream, access to the dropped part via [] + # or get returns nil. def drop(n) if is_eos? return 0 @@ -1224,27 +888,13 @@ private end size_dropped end - - # SYNOPSIS - # is_eos = CSV::StreamBuf#is_eos? - # - # RETURNS - # is_eos: true if end of the stream or false. - # - # DESCRIPTION - # Check EOF or not. - # + def is_eos? return idx_is_eos?(0) end - - # SYNOPSIS - # N/A - # - # DESCRIPTION - # Do not instanciate this class directly. Define your own class which - # derives this class and define 'read' instance method. - # + + # WARN: Do not instanciate this class directly. Define your own class + # which derives this class and define 'read' instance method. def initialize @buf_list = [] @cur_buf = @buf_tail_idx = -1 @@ -1253,22 +903,24 @@ private add_buf @cur_buf = @buf_tail_idx end - + protected + def terminate while (rel_buf); end end - + # protected method 'read' must be defined in derived classes. # CAUTION: Returning a string which size is not equal to 'size' means - # EnfOfStream. When it is not at EOS, you must block the callee, try to - # read and return the sized string. + # EnfOfStream. When it is not at EOS, you must block the callee, try to + # read and return the sized string. def read(size) # raise EOFError - raise NotImplementedError.new('Method read must be defined in a derived class.') + raise NotImplementedError.new( + 'method read must be defined in a derived class') end - + private - + def buf_size(idx) @buf_list[idx].size end @@ -1296,7 +948,7 @@ private true end end - + def rel_buf if (@cur_buf < 0) return false @@ -1310,16 +962,16 @@ private return true end end - + def idx_is_eos?(idx) (@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx))) end - + BufSize = 1024 * 8 end - # DESCRIPTION - # CSV::IOBuf -- a class for a bufferd IO. + + # Buffered IO. # # EXAMPLE # # File 'bigdata' could be a giga-byte size one! @@ -1330,21 +982,21 @@ private # end # class IOBuf < StreamBuf - public def initialize(s) @s = s super() end - + def close terminate end private + def read(size) @s.read(size) end - + def terminate super() end diff --git a/test/yaml/test_yaml.rb b/test/yaml/test_yaml.rb index 16d903b076..2000f99b60 100644 --- a/test/yaml/test_yaml.rb +++ b/test/yaml/test_yaml.rb @@ -1208,6 +1208,8 @@ EOY # # From Nobu Nakada [ruby-core:02311] # + t = 0..1 + assert_equal( t, YAML.load( YAML.dump( t ) ) ) t = "0".."1" assert_equal( t, YAML.load( YAML.dump( t ) ) ) t = ".."..."..." -- cgit v1.2.3