diff options
Diffstat (limited to 'ruby_1_8_6/lib/csv.rb')
-rw-r--r-- | ruby_1_8_6/lib/csv.rb | 992 |
1 files changed, 992 insertions, 0 deletions
diff --git a/ruby_1_8_6/lib/csv.rb b/ruby_1_8_6/lib/csv.rb new file mode 100644 index 0000000000..f6c12fa285 --- /dev/null +++ b/ruby_1_8_6/lib/csv.rb @@ -0,0 +1,992 @@ +# CSV -- module for generating/parsing CSV data. +# Copyright (C) 2000-2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>. + +# $Id$ + +# This program is copyrighted free software by NAKAMURA, Hiroshi. You can +# redistribute it and/or modify it under the same terms of Ruby's license; +# either the dual license version in 2003, or any later version. + + +class CSV + class IllegalFormatError < RuntimeError; end + + # deprecated + class Cell < String + def initialize(data = "", is_null = false) + super(is_null ? "" : data) + end + + def data + to_s + end + end + + # deprecated + class Row < Array + end + + # Open a CSV formatted file for reading or writing. + # + # For reading. + # + # EXAMPLE 1 + # CSV.open('csvfile.csv', 'r') do |row| + # p row + # end + # + # EXAMPLE 2 + # reader = CSV.open('csvfile.csv', 'r') + # row1 = reader.shift + # row2 = reader.shift + # if row2.empty? + # p 'row2 not find.' + # end + # reader.close + # + # ARGS + # filename: filename to parse. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. + # + # RETURNS + # reader instance. To get parse result, see CSV::Reader#each. + # + # + # For writing. + # + # EXAMPLE 1 + # CSV.open('csvfile.csv', 'w') do |writer| + # writer << ['r1c1', 'r1c2'] + # writer << ['r2c1', 'r2c2'] + # writer << [nil, nil] + # end + # + # EXAMPLE 2 + # writer = CSV.open('csvfile.csv', 'w') + # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil] + # writer.close + # + # ARGS + # filename: filename to generate. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. + # + # RETURNS + # writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how + # to generate CSV string. + # + def CSV.open(path, mode, fs = nil, rs = nil, &block) + if mode == 'r' or mode == 'rb' + open_reader(path, mode, fs, rs, &block) + elsif mode == 'w' or mode == 'wb' + open_writer(path, mode, fs, rs, &block) + else + raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'") + end + end + + def CSV.foreach(path, rs = nil, &block) + open_reader(path, 'r', ',', rs, &block) + end + + def CSV.read(path, length = nil, offset = nil) + CSV.parse(IO.read(path, length, offset)) + end + + def CSV.readlines(path, rs = nil) + reader = open_reader(path, 'r', ',', rs) + begin + reader.collect { |row| row } + ensure + reader.close + end + end + + def CSV.generate(path, fs = nil, rs = nil, &block) + open_writer(path, 'w', fs, rs, &block) + end + + # Parse lines from given string or stream. Return rows as an Array of Arrays. + def CSV.parse(str_or_readable, fs = nil, rs = nil, &block) + if File.exist?(str_or_readable) + STDERR.puts("CSV.parse(filename) is deprecated." + + " Use CSV.open(filename, 'r') instead.") + return open_reader(str_or_readable, 'r', fs, rs, &block) + end + if block + CSV::Reader.parse(str_or_readable, fs, rs) do |row| + yield(row) + end + nil + else + CSV::Reader.create(str_or_readable, fs, rs).collect { |row| row } + end + end + + # Parse a line from given string. Bear in mind it parses ONE LINE. Rest of + # the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the + # second line 'c,d' is ignored. + # + # If you don't know whether a target string to parse is exactly 1 line or + # not, use CSV.parse_row instead of this method. + def CSV.parse_line(src, fs = nil, rs = nil) + fs ||= ',' + if fs.is_a?(Fixnum) + fs = fs.chr + end + if !rs.nil? and rs.is_a?(Fixnum) + rs = rs.chr + end + idx = 0 + res_type = :DT_COLSEP + row = [] + begin + while res_type == :DT_COLSEP + res_type, idx, cell = parse_body(src, idx, fs, rs) + row << cell + end + rescue IllegalFormatError + return [] + end + row + end + + # Create a line from cells. each cell is stringified by to_s. + def CSV.generate_line(row, fs = nil, rs = nil) + if row.size == 0 + return '' + end + fs ||= ',' + if fs.is_a?(Fixnum) + fs = fs.chr + end + if !rs.nil? and rs.is_a?(Fixnum) + rs = rs.chr + end + res_type = :DT_COLSEP + result_str = '' + idx = 0 + while true + generate_body(row[idx], result_str, fs, rs) + idx += 1 + if (idx == row.size) + break + end + generate_separator(:DT_COLSEP, result_str, fs, rs) + end + result_str + end + + # Parse a line from string. Consider using CSV.parse_line instead. + # To parse lines in CSV string, see EXAMPLE below. + # + # EXAMPLE + # src = "a,b\r\nc,d\r\ne,f" + # idx = 0 + # begin + # parsed = [] + # parsed_cells, idx = CSV.parse_row(src, idx, parsed) + # puts "Parsed #{ parsed_cells } cells." + # p parsed + # end while parsed_cells > 0 + # + # ARGS + # src: a CSV data to be parsed. Must respond '[](idx)'. + # src[](idx) must return a char. (Not a string such as 'a', but 97). + # src[](idx_out_of_bounds) must return nil. A String satisfies this + # requirement. + # idx: index of parsing location of 'src'. 0 origin. + # out_dev: buffer for parsed cells. Must respond '<<(aString)'. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. + # + # RETURNS + # parsed_cells: num of parsed cells. + # idx: index of next parsing location of 'src'. + # + def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil) + fs ||= ',' + if fs.is_a?(Fixnum) + fs = fs.chr + end + if !rs.nil? and rs.is_a?(Fixnum) + rs = rs.chr + end + idx_backup = idx + parsed_cells = 0 + res_type = :DT_COLSEP + begin + while res_type != :DT_ROWSEP + res_type, idx, cell = parse_body(src, idx, fs, rs) + if res_type == :DT_EOS + if idx == idx_backup #((parsed_cells == 0) and cell.nil?) + return 0, 0 + end + res_type = :DT_ROWSEP + end + parsed_cells += 1 + out_dev << cell + end + rescue IllegalFormatError + return 0, 0 + end + return parsed_cells, idx + end + + # Convert a line from cells data to string. Consider using CSV.generate_line + # instead. To generate multi-row CSV string, see EXAMPLE below. + # + # EXAMPLE + # row1 = ['a', 'b'] + # row2 = ['c', 'd'] + # row3 = ['e', 'f'] + # src = [row1, row2, row3] + # buf = '' + # src.each do |row| + # parsed_cells = CSV.generate_row(row, 2, buf) + # puts "Created #{ parsed_cells } cells." + # end + # p buf + # + # ARGS + # src: an Array of String to be converted to CSV string. Must respond to + # 'size' and '[](idx)'. src[idx] must return String. + # cells: num of cells in a line. + # out_dev: buffer for generated CSV string. Must respond to '<<(string)'. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. + # + # RETURNS + # parsed_cells: num of converted cells. + # + def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil) + fs ||= ',' + if fs.is_a?(Fixnum) + fs = fs.chr + end + if !rs.nil? and rs.is_a?(Fixnum) + rs = rs.chr + end + src_size = src.size + if (src_size == 0) + if cells == 0 + generate_separator(:DT_ROWSEP, out_dev, fs, rs) + end + return 0 + end + res_type = :DT_COLSEP + parsed_cells = 0 + generate_body(src[parsed_cells], out_dev, fs, rs) + parsed_cells += 1 + while ((parsed_cells < cells) and (parsed_cells != src_size)) + generate_separator(:DT_COLSEP, out_dev, fs, rs) + generate_body(src[parsed_cells], out_dev, fs, rs) + parsed_cells += 1 + end + if (parsed_cells == cells) + generate_separator(:DT_ROWSEP, out_dev, fs, rs) + else + generate_separator(:DT_COLSEP, out_dev, fs, rs) + end + parsed_cells + end + + # Private class methods. + class << self + private + + def open_reader(path, mode, fs, rs, &block) + file = File.open(path, mode) + if block + begin + CSV::Reader.parse(file, fs, rs) do |row| + yield(row) + end + ensure + file.close + end + nil + else + reader = CSV::Reader.create(file, fs, rs) + reader.close_on_terminate + reader + end + end + + def open_writer(path, mode, fs, rs, &block) + file = File.open(path, mode) + if block + begin + CSV::Writer.generate(file, fs, rs) do |writer| + yield(writer) + end + ensure + file.close + end + nil + else + writer = CSV::Writer.create(file, fs, rs) + writer.close_on_terminate + writer + end + end + + def parse_body(src, idx, fs, rs) + fs_str = fs + fs_size = fs_str.size + rs_str = rs || "\n" + rs_size = rs_str.size + fs_idx = rs_idx = 0 + cell = Cell.new + state = :ST_START + quoted = cr = false + c = nil + last_idx = idx + while c = src[idx] + unless quoted + fschar = (c == fs_str[fs_idx]) + rschar = (c == rs_str[rs_idx]) + # simple 1 char backtrack + if !fschar and c == fs_str[0] + fs_idx = 0 + fschar = true + if state == :ST_START + state = :ST_DATA + elsif state == :ST_QUOTE + raise IllegalFormatError + end + end + if !rschar and c == rs_str[0] + rs_idx = 0 + rschar = true + if state == :ST_START + state = :ST_DATA + elsif state == :ST_QUOTE + raise IllegalFormatError + end + end + end + if c == ?" + fs_idx = rs_idx = 0 + if cr + raise IllegalFormatError + end + cell << src[last_idx, (idx - last_idx)] + last_idx = idx + if state == :ST_DATA + if quoted + last_idx += 1 + quoted = false + state = :ST_QUOTE + else + raise IllegalFormatError + end + elsif state == :ST_QUOTE + cell << c.chr + last_idx += 1 + quoted = true + state = :ST_DATA + else # :ST_START + quoted = true + last_idx += 1 + state = :ST_DATA + end + elsif fschar or rschar + if fschar + fs_idx += 1 + end + if rschar + rs_idx += 1 + end + sep = nil + if fs_idx == fs_size + if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx + state = :ST_DATA + end + cell << src[last_idx, (idx - last_idx - (fs_size - 1))] + last_idx = idx + fs_idx = rs_idx = 0 + if cr + raise IllegalFormatError + end + sep = :DT_COLSEP + elsif rs_idx == rs_size + if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx + state = :ST_DATA + end + if !(rs.nil? and cr) + cell << src[last_idx, (idx - last_idx - (rs_size - 1))] + last_idx = idx + end + fs_idx = rs_idx = 0 + sep = :DT_ROWSEP + end + if sep + if state == :ST_DATA + return sep, idx + 1, cell; + elsif state == :ST_QUOTE + return sep, idx + 1, cell; + else # :ST_START + return sep, idx + 1, nil + end + end + elsif rs.nil? and c == ?\r + # special \r treatment for backward compatibility + fs_idx = rs_idx = 0 + if cr + raise IllegalFormatError + end + cell << src[last_idx, (idx - last_idx)] + last_idx = idx + if quoted + state = :ST_DATA + else + cr = true + end + else + fs_idx = rs_idx = 0 + if state == :ST_DATA or state == :ST_START + if cr + raise IllegalFormatError + end + state = :ST_DATA + else # :ST_QUOTE + raise IllegalFormatError + end + end + idx += 1 + end + if state == :ST_START + if fs_idx > 0 or rs_idx > 0 + state = :ST_DATA + else + return :DT_EOS, idx, nil + end + elsif quoted + raise IllegalFormatError + elsif cr + raise IllegalFormatError + end + cell << src[last_idx, (idx - last_idx)] + last_idx = idx + return :DT_EOS, idx, cell + end + + def generate_body(cell, out_dev, fs, rs) + if cell.nil? + # empty + else + cell = cell.to_s + row_data = cell.dup + if (row_data.gsub!('"', '""') or + row_data.index(fs) or + (rs and row_data.index(rs)) or + (/[\r\n]/ =~ row_data) or + (cell.empty?)) + out_dev << '"' << row_data << '"' + else + out_dev << row_data + end + end + end + + def generate_separator(type, out_dev, fs, rs) + case type + when :DT_COLSEP + out_dev << fs + when :DT_ROWSEP + out_dev << (rs || "\n") + end + end + end + + + # CSV formatted string/stream reader. + # + # EXAMPLE + # read CSV lines untill the first column is 'stop'. + # + # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row| + # p row + # break if !row[0].is_null && row[0].data == 'stop' + # end + # + class Reader + include Enumerable + + # Parse CSV data and get lines. Given block is called for each parsed row. + # Block value is always nil. Rows are not cached for performance reason. + def Reader.parse(str_or_readable, fs = ',', rs = nil, &block) + reader = Reader.create(str_or_readable, fs, rs) + if block + reader.each do |row| + yield(row) + end + reader.close + nil + else + reader + end + end + + # Returns reader instance. + def Reader.create(str_or_readable, fs = ',', rs = nil) + case str_or_readable + when IO + IOReader.new(str_or_readable, fs, rs) + when String + StringReader.new(str_or_readable, fs, rs) + else + IOReader.new(str_or_readable, fs, rs) + end + end + + def each + while true + row = [] + parsed_cells = get_row(row) + if parsed_cells == 0 + break + end + yield(row) + end + nil + end + + def shift + row = [] + parsed_cells = get_row(row) + row + end + + def close + terminate + end + + private + + def initialize(dev) + raise RuntimeError.new('Do not instanciate this class directly.') + end + + def get_row(row) + raise NotImplementedError.new('Method get_row must be defined in a derived class.') + end + + def terminate + # Define if needed. + end + end + + + class StringReader < Reader + def initialize(string, fs = ',', rs = nil) + @fs = fs + @rs = rs + @dev = string + @idx = 0 + if @dev[0, 3] == "\xef\xbb\xbf" + @idx += 3 + end + end + + private + + def get_row(row) + parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs) + if parsed_cells == 0 and next_idx == 0 and @idx != @dev.size + raise IllegalFormatError.new + end + @idx = next_idx + parsed_cells + end + end + + + class IOReader < Reader + def initialize(io, fs = ',', rs = nil) + @io = io + @fs = fs + @rs = rs + @dev = CSV::IOBuf.new(@io) + @idx = 0 + if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf + @idx += 3 + end + @close_on_terminate = false + end + + # Tell this reader to close the IO when terminated (Triggered by invoking + # CSV::IOReader#close). + def close_on_terminate + @close_on_terminate = true + end + + private + + def get_row(row) + parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs) + if parsed_cells == 0 and next_idx == 0 and !@dev.is_eos? + raise IllegalFormatError.new + end + dropped = @dev.drop(next_idx) + @idx = next_idx - dropped + parsed_cells + end + + def terminate + if @close_on_terminate + @io.close + end + + if @dev + @dev.close + end + end + end + + + # CSV formatted string/stream writer. + # + # EXAMPLE + # Write rows to 'csvout' file. + # + # outfile = File.open('csvout', 'wb') + # CSV::Writer.generate(outfile) do |csv| + # csv << ['c1', nil, '', '"', "\r\n", 'c2'] + # ... + # end + # + # outfile.close + # + class Writer + # Given block is called with the writer instance. str_or_writable must + # handle '<<(string)'. + def Writer.generate(str_or_writable, fs = ',', rs = nil, &block) + writer = Writer.create(str_or_writable, fs, rs) + if block + yield(writer) + writer.close + nil + else + writer + end + end + + # str_or_writable must handle '<<(string)'. + def Writer.create(str_or_writable, fs = ',', rs = nil) + BasicWriter.new(str_or_writable, fs, rs) + end + + # dump CSV stream to the device. argument must be an Array of String. + def <<(row) + CSV.generate_row(row, row.size, @dev, @fs, @rs) + self + end + alias add_row << + + def close + terminate + end + + private + + def initialize(dev) + raise RuntimeError.new('Do not instanciate this class directly.') + end + + def terminate + # Define if needed. + end + end + + + class BasicWriter < Writer + def initialize(str_or_writable, fs = ',', rs = nil) + @fs = fs + @rs = rs + @dev = str_or_writable + @close_on_terminate = false + end + + # Tell this writer to close the IO when terminated (Triggered by invoking + # CSV::BasicWriter#close). + def close_on_terminate + @close_on_terminate = true + end + + private + + def terminate + if @close_on_terminate + @dev.close + end + end + end + +private + + # Buffered stream. + # + # EXAMPLE 1 -- an IO. + # class MyBuf < StreamBuf + # # Do initialize myself before a super class. Super class might call my + # # method 'read'. (Could be awful for C++ user. :-) + # def initialize(s) + # @s = s + # super() + # end + # + # # define my own 'read' method. + # # CAUTION: Returning nil means EnfOfStream. + # def read(size) + # @s.read(size) + # end + # + # # release buffers. in Ruby which has GC, you do not have to call this... + # def terminate + # @s = nil + # super() + # end + # end + # + # buf = MyBuf.new(STDIN) + # my_str = '' + # p buf[0, 0] # => '' (null string) + # p buf[0] # => 97 (char code of 'a') + # p buf[0, 1] # => 'a' + # my_str = buf[0, 5] + # p my_str # => 'abcde' (5 chars) + # p buf[0, 6] # => "abcde\n" (6 chars) + # p buf[0, 7] # => "abcde\n" (6 chars) + # p buf.drop(3) # => 3 (dropped chars) + # p buf.get(0, 2) # => 'de' (2 chars) + # p buf.is_eos? # => false (is not EOS here) + # p buf.drop(5) # => 3 (dropped chars) + # p buf.is_eos? # => true (is EOS here) + # p buf[0] # => nil (is EOS here) + # + # EXAMPLE 2 -- String. + # This is a conceptual example. No pros with this. + # + # class StrBuf < StreamBuf + # def initialize(s) + # @str = s + # @idx = 0 + # super() + # end + # + # def read(size) + # str = @str[@idx, size] + # @idx += str.size + # str + # end + # end + # + class StreamBuf + # get a char or a partial string from the stream. + # idx: index of a string to specify a start point of a string to get. + # unlike String instance, idx < 0 returns nil. + # n: size of a string to get. + # returns char at idx if n == nil. + # returns a partial string, from idx to (idx + n) if n != nil. at EOF, + # the string size could not equal to arg n. + def [](idx, n = nil) + if idx < 0 + return nil + end + if (idx_is_eos?(idx)) + if n and (@offset + idx == buf_size(@cur_buf)) + # Like a String, 'abc'[4, 1] returns nil and + # 'abc'[3, 1] returns '' not nil. + return '' + else + return nil + end + end + my_buf = @cur_buf + my_offset = @offset + next_idx = idx + while (my_offset + next_idx >= buf_size(my_buf)) + if (my_buf == @buf_tail_idx) + unless add_buf + break + end + end + next_idx = my_offset + next_idx - buf_size(my_buf) + my_buf += 1 + my_offset = 0 + end + loc = my_offset + next_idx + if !n + return @buf_list[my_buf][loc] # Fixnum of char code. + elsif (loc + n - 1 < buf_size(my_buf)) + return @buf_list[my_buf][loc, n] # String. + else # should do loop insted of (tail) recursive call... + res = @buf_list[my_buf][loc, BufSize] + size_added = buf_size(my_buf) - loc + if size_added > 0 + idx += size_added + n -= size_added + ret = self[idx, n] + if ret + res << ret + end + end + return res + end + end + alias get [] + + # drop a string from the stream. + # returns dropped size. at EOF, dropped size might not equals to arg n. + # Once you drop the head of the stream, access to the dropped part via [] + # or get returns nil. + def drop(n) + if is_eos? + return 0 + end + size_dropped = 0 + while (n > 0) + if !@is_eos or (@cur_buf != @buf_tail_idx) + if (@offset + n < buf_size(@cur_buf)) + size_dropped += n + @offset += n + n = 0 + else + size = buf_size(@cur_buf) - @offset + size_dropped += size + n -= size + @offset = 0 + unless rel_buf + unless add_buf + break + end + @cur_buf = @buf_tail_idx + end + end + end + end + size_dropped + end + + def is_eos? + return idx_is_eos?(0) + end + + # WARN: Do not instantiate this class directly. Define your own class + # which derives this class and define 'read' instance method. + def initialize + @buf_list = [] + @cur_buf = @buf_tail_idx = -1 + @offset = 0 + @is_eos = false + add_buf + @cur_buf = @buf_tail_idx + end + + protected + + def terminate + while (rel_buf); end + end + + # protected method 'read' must be defined in derived classes. + # CAUTION: Returning a string which size is not equal to 'size' means + # EnfOfStream. When it is not at EOS, you must block the callee, try to + # read and return the sized string. + def read(size) # raise EOFError + raise NotImplementedError.new('Method read must be defined in a derived class.') + end + + private + + def buf_size(idx) + @buf_list[idx].size + end + + def add_buf + if @is_eos + return false + end + begin + str_read = read(BufSize) + rescue EOFError + str_read = nil + rescue + terminate + raise + end + if str_read.nil? + @is_eos = true + @buf_list.push('') + @buf_tail_idx += 1 + false + else + @buf_list.push(str_read) + @buf_tail_idx += 1 + true + end + end + + def rel_buf + if (@cur_buf < 0) + return false + end + @buf_list[@cur_buf] = nil + if (@cur_buf == @buf_tail_idx) + @cur_buf = -1 + return false + else + @cur_buf += 1 + return true + end + end + + def idx_is_eos?(idx) + (@is_eos and ((@cur_buf < 0) or (@cur_buf == @buf_tail_idx))) + end + + BufSize = 1024 * 8 + end + + # Buffered IO. + # + # EXAMPLE + # # File 'bigdata' could be a giga-byte size one! + # buf = CSV::IOBuf.new(File.open('bigdata', 'rb')) + # CSV::Reader.new(buf).each do |row| + # p row + # break if row[0].data == 'admin' + # end + # + class IOBuf < StreamBuf + def initialize(s) + @s = s + super() + end + + def close + terminate + end + + private + + def read(size) + @s.read(size) + end + + def terminate + super() + end + end +end |