summaryrefslogtreecommitdiff
path: root/lib/csv.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/csv.rb')
-rw-r--r--lib/csv.rb124
1 files changed, 69 insertions, 55 deletions
diff --git a/lib/csv.rb b/lib/csv.rb
index 947eacbcfa..ee686db0cf 100644
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -189,32 +189,32 @@ public
# writer << [nil, nil]
# end
#
- def CSV.open(filename, mode, col_sep = ?,, &block)
+ def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block)
if mode == 'r' or mode == 'rb'
- open_reader(filename, col_sep, &block)
+ open_reader(filename, col_sep, row_sep, &block)
elsif mode == 'w' or mode == 'wb'
- open_writer(filename, col_sep, &block)
+ open_writer(filename, col_sep, row_sep, &block)
else
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
end
end
- def CSV.parse(filename, col_sep = ?,, &block)
- open_reader(filename, col_sep, &block)
+ def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block)
+ open_reader(filename, col_sep, row_sep, &block)
end
- def CSV.generate(filename, col_sep = ?,, &block)
- open_writer(filename, col_sep, &block)
+ def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block)
+ open_writer(filename, col_sep, row_sep, &block)
end
# Private class methods.
class << self
private
- def open_reader(filename, col_sep, &block)
+ def open_reader(filename, col_sep, row_sep, &block)
file = File.open(filename, 'rb')
if block
begin
- CSV::Reader.parse(file, col_sep) do |row|
+ CSV::Reader.parse(file, col_sep, row_sep) do |row|
yield(row)
end
ensure
@@ -222,17 +222,17 @@ public
end
nil
else
- reader = CSV::Reader.create(file, col_sep)
+ reader = CSV::Reader.create(file, col_sep, row_sep)
reader.close_on_terminate
reader
end
end
- def open_writer(filename, col_sep, &block)
+ def open_writer(filename, col_sep, row_sep, &block)
file = File.open(filename, 'wb')
if block
begin
- CSV::Writer.generate(file, col_sep) do |writer|
+ CSV::Writer.generate(file, col_sep, row_sep) do |writer|
yield(writer)
end
ensure
@@ -240,7 +240,7 @@ public
end
nil
else
- writer = CSV::Writer.create(file, col_sep)
+ writer = CSV::Writer.create(file, col_sep, row_sep)
writer.close_on_terminate
writer
end
@@ -275,14 +275,14 @@ public
# DESCRIPTION
# Create instance. To get parse result, see CSV::Reader#each.
#
- def Reader.create(str_or_readable, col_sep = ?,)
+ def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil)
case str_or_readable
when IO
- IOReader.new(str_or_readable, col_sep)
+ IOReader.new(str_or_readable, col_sep, row_sep)
when String
- StringReader.new(str_or_readable, col_sep)
+ StringReader.new(str_or_readable, col_sep, row_sep)
else
- IOReader.new(str_or_readable, col_sep)
+ IOReader.new(str_or_readable, col_sep, row_sep)
end
end
@@ -305,8 +305,8 @@ public
# Block value is always nil. Rows are not cached for performance
# reason.
#
- def Reader.parse(str_or_readable, col_sep = ?,)
- reader = create(str_or_readable, col_sep)
+ def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil)
+ reader = create(str_or_readable, col_sep, row_sep)
reader.each do |row|
yield(row)
end
@@ -413,8 +413,9 @@ public
# DESCRIPTION
# Create instance. To get parse result, see CSV::Reader#each.
#
- def initialize(string, col_sep = ?,)
+ def initialize(string, col_sep = ?,, row_sep = nil)
@col_sep = col_sep
+ @row_sep = row_sep
@dev = string
@idx = 0
if @dev[0, 3] == "\xef\xbb\xbf"
@@ -424,7 +425,7 @@ public
private
def get_row(row)
- parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep)
+ parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
raise IllegalFormatError.new
end
@@ -460,9 +461,10 @@ public
# DESCRIPTION
# Create instance. To get parse result, see CSV::Reader#each.
#
- def initialize(io, col_sep = ?,)
+ def initialize(io, col_sep = ?,, row_sep = nil)
@io = io
@col_sep = col_sep
+ @row_sep = row_sep
@dev = CSV::IOBuf.new(@io)
@idx = 0
if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
@@ -487,7 +489,7 @@ public
private
def get_row(row)
- parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep)
+ parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep)
if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
raise IllegalFormatError.new
end
@@ -549,8 +551,8 @@ public
# Create instance. To add CSV data to generate CSV string, see
# CSV::Writer#<< or CSV::Writer#add_row.
#
- def Writer.create(str_or_readable, col_sep = ?,)
- BasicWriter.new(str_or_readable, col_sep)
+ def Writer.create(str_or_readable, col_sep = ?,, row_sep = nil)
+ BasicWriter.new(str_or_readable, col_sep, row_sep)
end
# SYNOPSIS
@@ -572,8 +574,8 @@ public
# To add CSV data to generate CSV string, see CSV::Writer#<< or
# CSV::Writer#add_row.
#
- def Writer.generate(str_or_writable, col_sep = ?,)
- writer = Writer.create(str_or_writable, col_sep)
+ def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil)
+ writer = Writer.create(str_or_writable, col_sep, row_sep)
yield(writer)
writer.close
nil
@@ -602,7 +604,7 @@ public
Cell.new(item.to_s, false)
end
}
- CSV.generate_row(row, row.size, @dev, @col_sep)
+ CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
self
end
@@ -621,7 +623,7 @@ public
# (Formar is 'c1' and latter is Null.)
#
def add_row(row)
- CSV.generate_row(row, row.size, @dev, @col_sep)
+ CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep)
self
end
@@ -669,8 +671,9 @@ public
# Create instance. To add CSV data to generate CSV string, see
# CSV::Writer#<< or CSV::Writer#add_row.
#
- def initialize(str_or_writable, col_sep = ?,)
+ def initialize(str_or_writable, col_sep = ?,, row_sep = nil)
@col_sep = col_sep
+ @row_sep = row_sep
@dev = str_or_writable
@close_on_terminate = false
end
@@ -698,12 +701,14 @@ public
end
# SYNOPSIS
- # cells = CSV.parse_line(src, col_sep = ?,)
+ # cells = CSV.parse_line(src, col_sep = ?,, row_sep = nil)
#
# ARGS
# src: a CSV String.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
+ # want to separate records with \r, give ?\r here.
#
# RETURNS
# cells: an Array of parsed cells in first line. Each cell is a String.
@@ -716,14 +721,14 @@ public
# If you don't know whether a target string to parse is exactly 1 line or
# not, use CSV.parse_row instead of this method.
#
- def CSV.parse_line(src, col_sep = ?,)
+ def CSV.parse_line(src, col_sep = ?,, row_sep = nil)
idx = 0
res_type = :DT_COLSEP
cells = Row.new
begin
while (res_type.equal?(:DT_COLSEP))
cell = Cell.new
- res_type, idx = parse_body(src, idx, cell, col_sep)
+ res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
cells.push(cell.is_null ? nil : cell.data)
end
rescue IllegalFormatError
@@ -734,13 +739,15 @@ public
# SYNOPSIS
- # str = CSV.generate_line(cells, col_sep = ?,)
+ # str = CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
#
# ARGS
# cells: an Array of cell to be converted to CSV string. Each cell must
# respond to 'to_s'.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
+ # want to separate records with \r, give ?\r here.
#
# RETURNS
# str: a String of generated CSV string.
@@ -748,7 +755,7 @@ public
# DESCRIPTION
# Create a line from cells. Each cell is stringified by to_s.
#
- def CSV.generate_line(cells, col_sep = ?,)
+ def CSV.generate_line(cells, col_sep = ?,, row_sep = nil)
if (cells.size == 0)
return ''
end
@@ -761,18 +768,18 @@ public
else
Cell.new(cells[idx].to_s, false)
end
- generate_body(cell, result_str, col_sep)
+ generate_body(cell, result_str, col_sep, row_sep)
idx += 1
if (idx == cells.size)
break
end
- generate_separator(:DT_COLSEP, result_str, col_sep)
+ generate_separator(:DT_COLSEP, result_str, col_sep, row_sep)
end
result_str
end
# SYNOPSIS
- # parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,)
+ # parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
#
# ARGS
# src: a CSV data to be parsed. Must respond '[](idx)'.
@@ -783,6 +790,8 @@ public
# out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
+ # want to separate records with \r, give ?\r here.
#
# RETURNS
# parsed_cells: num of parsed cells.
@@ -802,14 +811,14 @@ public
# p parsed
# end while parsed_cells > 0
#
- def CSV.parse_row(src, idx, out_dev, col_sep = ?,)
+ def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil)
idx_backup = idx
parsed_cells = 0
res_type = :DT_COLSEP
begin
while (!res_type.equal?(:DT_ROWSEP))
cell = Cell.new
- res_type, idx = parse_body(src, idx, cell, col_sep)
+ res_type, idx = parse_body(src, idx, cell, col_sep, row_sep)
if res_type.equal?(:DT_EOS)
if idx == idx_backup #((parsed_cells == 0) && (cell.is_null))
return 0, 0
@@ -826,7 +835,7 @@ public
end
# SYNOPSIS
- # parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,)
+ # parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
#
# ARGS
# src: an Array of CSV::Cell to be converted to CSV string. Must respond to
@@ -835,6 +844,8 @@ public
# out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
+ # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
+ # want to separate records with \r, give ?\r here.
#
# RETURNS
# parsed_cells: num of converted cells.
@@ -859,27 +870,27 @@ public
# end
# p buf
#
- def CSV.generate_row(src, cells, out_dev, col_sep = ?,)
+ def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil)
src_size = src.size
if (src_size == 0)
if cells == 0
- generate_separator(:DT_ROWSEP, out_dev, col_sep)
+ generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
end
return 0
end
res_type = :DT_COLSEP
parsed_cells = 0
- generate_body(src[parsed_cells], out_dev, col_sep)
+ generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
parsed_cells += 1
while ((parsed_cells < cells) && (parsed_cells != src_size))
- generate_separator(:DT_COLSEP, out_dev, col_sep)
- generate_body(src[parsed_cells], out_dev, col_sep)
+ generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
+ generate_body(src[parsed_cells], out_dev, col_sep, row_sep)
parsed_cells += 1
end
if (parsed_cells == cells)
- generate_separator(:DT_ROWSEP, out_dev, col_sep)
+ generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep)
else
- generate_separator(:DT_COLSEP, out_dev, col_sep)
+ generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep)
end
parsed_cells
end
@@ -891,7 +902,8 @@ private
class << self
private
- def parse_body(src, idx, cell, col_sep)
+ def parse_body(src, idx, cell, col_sep, row_sep)
+ row_sep_end = row_sep || ?\n
cell.is_null = false
state = :ST_START
quoted = false
@@ -941,7 +953,7 @@ private
quoted = true
state = :ST_DATA
end
- elsif (c == ?\r)
+ elsif row_sep.nil? and c == ?\r
if cr
raise IllegalFormatError.new
end
@@ -951,7 +963,7 @@ private
else
cr = true
end
- elsif (c == ?\n)
+ elsif c == row_sep_end
if state.equal?(:ST_DATA)
if cr
state = :ST_END
@@ -1004,12 +1016,14 @@ private
return :DT_EOS, idx
end
- def generate_body(cells, out_dev, col_sep)
+ def generate_body(cells, out_dev, col_sep, row_sep)
row_data = cells.data.dup
if (!cells.is_null)
if (row_data.gsub!('"', '""') ||
row_data.include?(col_sep) ||
- (/[\r\n]/ =~ row_data) || (cells.data.empty?))
+ (row_sep && row_data.index(row_sep)) ||
+ (/[\r\n]/ =~ row_data) ||
+ (cells.data.empty?))
out_dev << '"' << row_data << '"'
else
out_dev << row_data
@@ -1017,12 +1031,12 @@ private
end
end
- def generate_separator(type, out_dev, col_sep)
+ def generate_separator(type, out_dev, col_sep, row_sep)
case type
when :DT_COLSEP
out_dev << col_sep.chr
when :DT_ROWSEP
- out_dev << "\r\n"
+ out_dev << (row_sep || "\r\n")
end
end
end