summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornahi <nahi@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-05-20 17:24:04 +0000
committernahi <nahi@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2004-05-20 17:24:04 +0000
commit406f506d59510743469db2421ecdf6460aace6af (patch)
tree79a08f765d70d1779cfba95dc60ce3e22d02bb4e
parentfc04396ea31c74a85bb34ea4d2bbebd13e8f87f9 (diff)
* lib/csv.rb: fixed a few bugs around multi char record/field separator.
* test/csv/test_csv.rb: added boundary test for above feature. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6377 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog6
-rw-r--r--lib/csv.rb172
-rw-r--r--test/csv/test_csv.rb153
-rw-r--r--version.h6
4 files changed, 251 insertions, 86 deletions
diff --git a/ChangeLog b/ChangeLog
index 8d1a2446b0..8546735817 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Fri May 21 02:21:11 2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>
+
+ * lib/csv.rb: fixed a few bugs around multi char record/field separator.
+
+ * test/csv/test_csv.rb: added boundary test for above feature.
+
Thu May 20 17:02:03 2004 Nobuyoshi Nakada <nobu@ruby-lang.org>
* lib/mkmf.rb (check_sizeof): define result size. [ruby-core:02911]
diff --git a/lib/csv.rb b/lib/csv.rb
index 351976fd00..26fc6435eb 100644
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -11,7 +11,7 @@
class CSV
class IllegalFormatError < RuntimeError; end
- def CSV.open(path, mode, fs = ',', rs = nil, &block)
+ def CSV.open(path, mode, fs = nil, rs = nil, &block)
if mode == 'r' or mode == 'rb'
open_reader(path, mode, fs, rs, &block)
elsif mode == 'w' or mode == 'wb'
@@ -51,7 +51,7 @@ class CSV
# RETURNS
# reader instance. To get parse result, see CSV::Reader#each.
#
- def CSV.parse(path, fs = ',', rs = nil, &block)
+ def CSV.parse(path, fs = nil, rs = nil, &block)
open_reader(path, 'r', fs, rs, &block)
end
@@ -80,7 +80,7 @@ class CSV
# writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
# to generate CSV string.
#
- def CSV.generate(path, fs = ',', rs = nil, &block)
+ def CSV.generate(path, fs = nil, rs = nil, &block)
open_writer(path, 'w', fs, rs, &block)
end
@@ -90,8 +90,9 @@ class CSV
#
# If you don't know whether a target string to parse is exactly 1 line or
# not, use CSV.parse_row instead of this method.
- def CSV.parse_line(src, fs = ',', rs = nil)
- if !fs.nil? and fs.is_a?(Fixnum)
+ def CSV.parse_line(src, fs = nil, rs = nil)
+ fs ||= ','
+ if fs.is_a?(Fixnum)
fs = fs.chr
end
if !rs.nil? and rs.is_a?(Fixnum)
@@ -101,7 +102,7 @@ class CSV
res_type = :DT_COLSEP
row = []
begin
- while (res_type.equal?(:DT_COLSEP))
+ while res_type == :DT_COLSEP
res_type, idx, cell = parse_body(src, idx, fs, rs)
row << cell
end
@@ -112,11 +113,12 @@ class CSV
end
# Create a line from cells. each cell is stringified by to_s.
- def CSV.generate_line(row, fs = ',', rs = nil)
- if (row.size == 0)
+ def CSV.generate_line(row, fs = nil, rs = nil)
+ if row.size == 0
return ''
end
- if !fs.nil? and fs.is_a?(Fixnum)
+ fs ||= ','
+ if fs.is_a?(Fixnum)
fs = fs.chr
end
if !rs.nil? and rs.is_a?(Fixnum)
@@ -165,8 +167,9 @@ class CSV
# parsed_cells: num of parsed cells.
# idx: index of next parsing location of 'src'.
#
- def CSV.parse_row(src, idx, out_dev, fs = ',', rs = nil)
- if !fs.nil? and fs.is_a?(Fixnum)
+ def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil)
+ fs ||= ','
+ if fs.is_a?(Fixnum)
fs = fs.chr
end
if !rs.nil? and rs.is_a?(Fixnum)
@@ -176,9 +179,9 @@ class CSV
parsed_cells = 0
res_type = :DT_COLSEP
begin
- while (!res_type.equal?(:DT_ROWSEP))
+ while res_type != :DT_ROWSEP
res_type, idx, cell = parse_body(src, idx, fs, rs)
- if res_type.equal?(:DT_EOS)
+ if res_type == :DT_EOS
if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
return 0, 0
end
@@ -225,8 +228,9 @@ class CSV
# RETURNS
# parsed_cells: num of converted cells.
#
- def CSV.generate_row(src, cells, out_dev, fs = ',', rs = nil)
- if !fs.nil? and fs.is_a?(Fixnum)
+ def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil)
+ fs ||= ','
+ if fs.is_a?(Fixnum)
fs = fs.chr
end
if !rs.nil? and rs.is_a?(Fixnum)
@@ -299,30 +303,46 @@ class CSV
def parse_body(src, idx, fs, rs)
fs_str = fs
fs_size = fs_str.size
- fs_idx = 0
rs_str = rs || "\n"
rs_size = rs_str.size
- rs_idx = 0
+ fs_idx = rs_idx = 0
cell = ''
state = :ST_START
- quoted = false
- cr = false
+ quoted = cr = false
c = nil
last_idx = idx
- while (c = src[idx])
- if c == ?"
- cell << src[last_idx, (idx - last_idx)]
- last_idx = idx
- if cr
- raise IllegalFormatError
- end
- if fs_idx != 0
+ while c = src[idx]
+ unless quoted
+ fschar = (c == fs_str[fs_idx])
+ rschar = (c == rs_str[rs_idx])
+ # simple 1 char backtrack
+ if !fschar and c == fs_str[0]
fs_idx = 0
+ fschar = true
+ if state == :ST_START
+ state = :ST_DATA
+ elsif state == :ST_QUOTE
+ raise IllegalFormatError
+ end
end
- if rs_idx != 0
+ if !rschar and c == rs_str[0]
rs_idx = 0
+ rschar = true
+ if state == :ST_START
+ state = :ST_DATA
+ elsif state == :ST_QUOTE
+ raise IllegalFormatError
+ end
end
- if state.equal?(:ST_DATA)
+ end
+ if c == ?"
+ fs_idx = rs_idx = 0
+ if cr
+ raise IllegalFormatError
+ end
+ cell << src[last_idx, (idx - last_idx)]
+ last_idx = idx
+ if state == :ST_DATA
if quoted
last_idx += 1
quoted = false
@@ -330,7 +350,7 @@ class CSV
else
raise IllegalFormatError
end
- elsif state.equal?(:ST_QUOTE)
+ elsif state == :ST_QUOTE
cell << c.chr
last_idx += 1
quoted = true
@@ -340,62 +360,48 @@ class CSV
last_idx += 1
state = :ST_DATA
end
- elsif c == fs_str[fs_idx]
- fs_idx += 1
- cell << src[last_idx, (idx - last_idx)]
- last_idx = idx
- if rs_idx != 0
- rs_idx = 0
+ elsif fschar or rschar
+ if fschar
+ fs_idx += 1
+ end
+ if rschar
+ rs_idx += 1
end
+ sep = nil
if fs_idx == fs_size
- fs_idx = 0
+ if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx
+ state = :ST_DATA
+ end
+ cell << src[last_idx, (idx - last_idx - (fs_size - 1))]
+ last_idx = idx
+ fs_idx = rs_idx = 0
if cr
raise IllegalFormatError
end
- if state.equal?(:ST_DATA)
- if rs_idx != 0
- cell << rs_str[0, rs_idx]
- rs_idx = 0
- end
- if quoted
- true # ToDo: delete; dummy line for coverage
- else
- return :DT_COLSEP, idx + 1, cell;
- end
- elsif state.equal?(:ST_QUOTE)
- if rs_idx != 0
- raise IllegalFormatError
- end
- return :DT_COLSEP, idx + 1, cell;
- else # :ST_START
- return :DT_COLSEP, idx + 1, nil
+ sep = :DT_COLSEP
+ elsif rs_idx == rs_size
+ if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx
+ state = :ST_DATA
end
+ if !(rs.nil? and cr)
+ cell << src[last_idx, (idx - last_idx - (rs_size - 1))]
+ last_idx = idx
+ end
+ fs_idx = rs_idx = 0
+ sep = :DT_ROWSEP
end
- elsif c == rs_str[rs_idx]
- rs_idx += 1
- unless (rs.nil? and cr)
- cell << src[last_idx, (idx - last_idx)]
- last_idx = idx
- end
- if fs_idx != 0
- fs_idx = 0
- end
- if rs_idx == rs_size
- rs_idx = 0
- if state.equal?(:ST_DATA)
- if quoted
- true # ToDo: delete; dummy line for coverage
- else
- return :DT_ROWSEP, idx + 1, cell
- end
- elsif state.equal?(:ST_QUOTE)
- return :DT_ROWSEP, idx + 1, cell
+ if sep
+ if state == :ST_DATA
+ return sep, idx + 1, cell;
+ elsif state == :ST_QUOTE
+ return sep, idx + 1, cell;
else # :ST_START
- return :DT_ROWSEP, idx + 1, nil
+ return sep, idx + 1, nil
end
end
elsif rs.nil? and c == ?\r
# special \r treatment for backward compatibility
+ fs_idx = rs_idx = 0
if cr
raise IllegalFormatError
end
@@ -407,13 +413,8 @@ class CSV
cr = true
end
else
- if fs_idx != 0
- fs_idx = 0
- end
- if rs_idx != 0
- rs_idx = 0
- end
- if state.equal?(:ST_DATA) or state.equal?(:ST_START)
+ fs_idx = rs_idx = 0
+ if state == :ST_DATA or state == :ST_START
if cr
raise IllegalFormatError
end
@@ -424,8 +425,12 @@ class CSV
end
idx += 1
end
- if state.equal?(:ST_START)
- return :DT_EOS, idx, nil
+ if state == :ST_START
+ if fs_idx > 0 or rs_idx > 0
+ state = :ST_DATA
+ else
+ return :DT_EOS, idx, nil
+ end
elsif quoted
raise IllegalFormatError
elsif cr
@@ -440,6 +445,7 @@ class CSV
if cell.nil?
# empty
else
+ cell = cell.to_s
row_data = cell.dup
if (row_data.gsub!('"', '""') or
row_data.index(fs) or
diff --git a/test/csv/test_csv.rb b/test/csv/test_csv.rb
index 6dc101edf1..eca88321f0 100644
--- a/test/csv/test_csv.rb
+++ b/test/csv/test_csv.rb
@@ -639,6 +639,12 @@ public
buf = CSV.generate_line(col, ?\t)
assert_equal(str + "\n", tsv2csv(buf))
end
+
+ str = CSV.generate_line(['a', 'b'], nil, ?|)
+ assert_equal('a,b', str)
+
+ str = CSV.generate_line(['a', 'b'], nil, "a")
+ assert_equal('"a",b', str)
end
def test_s_generate_row
@@ -818,6 +824,15 @@ public
assert_equal(col, row)
end
+ row = CSV.parse_line("a,b,c", nil, nil)
+ assert_equal(['a', 'b', 'c'], row)
+
+ row = CSV.parse_line("a,b,c", nil, ?b)
+ assert_equal(['a', nil], row)
+
+ row = CSV.parse_line("a,b,c", nil, "c")
+ assert_equal(['a', 'b', nil], row)
+
# Illegal format.
buf = []
row = CSV.parse_line("a,b,\"c\"\ra")
@@ -923,6 +938,18 @@ public
assert_equal(col, buf, str)
end
+ buf = []
+ CSV.parse_row("a,b,c", 0, buf, nil, nil)
+ assert_equal(['a', 'b', 'c'], buf)
+
+ buf = []
+ CSV.parse_row("a,b,c", 0, buf, nil, ?b)
+ assert_equal(['a', nil], buf)
+
+ buf = []
+ CSV.parse_row("a,b,c", 0, buf, nil, "c")
+ assert_equal(['a', 'b', nil], buf)
+
buf = Array.new
cols, idx = CSV.parse_row("a,b,\"c\r\"", 0, buf)
assert_equal(["a", "b", "c\r"], buf.to_a)
@@ -1577,4 +1604,130 @@ public
end
assert_equal(csvStrTerminated, buf)
end
+
+ def test_writer_fs_rs_generate
+ buf = ''
+ CSV::Writer.generate(buf, ",,") do |writer|
+ writer << []
+ end
+ assert_equal("\n", buf)
+
+ buf = ''
+ CSV::Writer.generate(buf, ",,") do |writer|
+ writer << [] << []
+ end
+ assert_equal("\n\n", buf)
+
+ buf = ''
+ CSV::Writer.generate(buf, ",,") do |writer|
+ writer << [1]
+ end
+ assert_equal("1\n", buf)
+
+ buf = ''
+ CSV::Writer.generate(buf, ",,") do |writer|
+ writer << [1, 2, 3]
+ writer << [4, ",,", 5]
+ end
+ assert_equal("1,,2,,3\n4,,\",,\",,5\n", buf)
+
+ buf = ''
+ CSV::Writer.generate(buf, ",,:", ",,;") do |writer|
+ writer << [nil, nil, nil]
+ writer << [nil, ",,", nil]
+ end
+ assert_equal(",,:,,:,,;,,:,,,,:,,;", buf)
+
+ buf = ''
+ CSV::Writer.generate(buf, "---") do |writer|
+ writer << [1, 2, 3]
+ writer << [4, "---\"---", 5]
+ end
+ assert_equal("1---2---3\n4---\"---\"\"---\"---5\n", buf)
+
+ buf = ''
+ CSV::Writer.generate(buf, nil) do |writer|
+ writer << [1, 2, 3]
+ writer << [4, ",\",", 5]
+ end
+ assert_equal("1,2,3\n4,\",\"\",\",5\n", buf)
+ end
+
+ def test_writer_fs_rs_parse
+ reader = CSV::Reader.create('a||b--c||d', '||', '--')
+ assert_equal(['a', 'b'], reader.shift)
+ assert_equal(['c', 'd'], reader.shift)
+
+ reader = CSV::Reader.create("a@|b@-c@|d", "@|", "@-")
+ assert_equal(['a', 'b'], reader.shift)
+ assert_equal(['c', 'd'], reader.shift)
+
+ reader = CSV::Reader.create("ababfsababrs", "abfs", "abrs")
+ assert_equal(['ab', 'ab'], reader.shift)
+
+ reader = CSV::Reader.create('"ab"abfsababrs', "abfs", "abrs")
+ assert_equal(['ab', 'ab'], reader.shift)
+
+ reader = CSV::Reader.create('"ab"aabfsababrs', "abfs", "abrs")
+ assert_raises(CSV::IllegalFormatError) do
+ reader.shift
+ end
+
+ # fs match while matching rs progress
+ reader = CSV::Reader.create("ab,ababrs", nil, "abrs")
+ assert_equal(['ab', 'ab'], reader.shift)
+
+ reader = CSV::Reader.create(',ababrs', nil, "abrs")
+ assert_equal([nil, 'ab'], reader.shift)
+
+ reader = CSV::Reader.create('"",ababrs', nil, "abrs")
+ assert_equal(['', 'ab'], reader.shift)
+
+ reader = CSV::Reader.create('ab,"ab"abrs', nil, "abrs")
+ assert_equal(['ab', 'ab'], reader.shift)
+
+ reader = CSV::Reader.create('ab,"ab"aabrs', nil, "abrs")
+ assert_raises(CSV::IllegalFormatError) do
+ reader.shift
+ end
+
+ # rs match while matching fs progress
+ reader = CSV::Reader.create("ab|abc", 'ab-', "ab|")
+ assert_equal([nil], reader.shift)
+ assert_equal(['abc'], reader.shift)
+
+ # EOF while fs/rs matching
+ reader = CSV::Reader.create("ab", 'ab-', "xyz")
+ assert_equal(['ab'], reader.shift)
+
+ reader = CSV::Reader.create("ab", 'xyz', "ab|")
+ assert_equal(['ab'], reader.shift)
+
+ reader = CSV::Reader.create("ab", 'ab-', "ab|")
+ assert_equal(['ab'], reader.shift)
+
+ reader = CSV::Reader.create(",,:,,:,,;,,:,,,,:,,;", ",,:", ",,;")
+ assert_equal([nil, nil, nil], reader.shift)
+ assert_equal([nil, ",,", nil], reader.shift)
+ end
+
+ def test_foreach
+ File.open(@outfile, "w") do |f|
+ f << "1,2,3\n4,5,6"
+ end
+ row = []
+ CSV.foreach(@outfile) { |line|
+ row << line
+ }
+ assert_equal([['1', '2', '3'], ['4', '5', '6']], row)
+
+ File.open(@outfile, "w") do |f|
+ f << "1,2,3\r4,5,6"
+ end
+ row = []
+ CSV.foreach(@outfile, "\r") { |line|
+ row << line
+ }
+ assert_equal([['1', '2', '3'], ['4', '5', '6']], row)
+ end
end
diff --git a/version.h b/version.h
index 8c0ef35bde..ce02c28dd9 100644
--- a/version.h
+++ b/version.h
@@ -1,11 +1,11 @@
#define RUBY_VERSION "1.9.0"
-#define RUBY_RELEASE_DATE "2004-05-20"
+#define RUBY_RELEASE_DATE "2004-05-21"
#define RUBY_VERSION_CODE 190
-#define RUBY_RELEASE_CODE 20040520
+#define RUBY_RELEASE_CODE 20040521
#define RUBY_VERSION_MAJOR 1
#define RUBY_VERSION_MINOR 9
#define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2004
#define RUBY_RELEASE_MONTH 5
-#define RUBY_RELEASE_DAY 20
+#define RUBY_RELEASE_DAY 21