From ba87ab3a7e05e157ef7d29c9f6dc1e40e1f087d2 Mon Sep 17 00:00:00 2001 From: yugui Date: Thu, 1 Jul 2010 02:05:16 +0000 Subject: merges r28431 and r28432 from trunk into ruby_1_9_2. -- * lib/csv.rb: Fixing a bug that prevented CSV from parsing all multi-line fields correctly. Patch by Rob Biedenham. -- Fixing a spelling error. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_9_2@28501 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 5 +++++ lib/csv.rb | 10 ++++++++-- test/csv/test_csv_parsing.rb | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9d4e3b1e7b..1bd3c0fab2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Fri Jun 25 11:45:36 2010 James Edward Gray II + + * lib/csv.rb: Fixing a bug that prevented CSV from parsing + all multi-line fields correctly. Patch by Rob Biedenharn. + Sat Jun 26 10:08:36 2010 Nobuyoshi Nakada * test/ruby/envutil.rb (EnvUtil#invoke_ruby): no needs to copy the diff --git a/lib/csv.rb b/lib/csv.rb index d3a295b3a8..8685e3d658 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -198,7 +198,7 @@ require "stringio" # class CSV # The version of the installed library. - VERSION = "2.4.6".freeze + VERSION = "2.4.7".freeze # # A CSV::Row is part Array and part Hash. It retains an order for the fields @@ -1843,7 +1843,13 @@ class CSV end parts = parse.split(@col_sep, -1) - csv << nil if parts.empty? + if parts.empty? + if in_extended_col + csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop + else + csv << nil + end + end # This loop is the hot path of csv parsing. Some things may be non-dry # for a reason. Make sure to benchmark when refactoring. diff --git a/test/csv/test_csv_parsing.rb b/test/csv/test_csv_parsing.rb index e3609b7648..c0b8d83f96 100644 --- a/test/csv/test_csv_parsing.rb +++ b/test/csv/test_csv_parsing.rb @@ -115,6 +115,22 @@ class TestCSVParsing < Test::Unit::TestCase assert_equal(Array.new, CSV.parse_line("\n1,2,3\n")) end + def test_rob_edge_cases + [ [%Q{"a\nb"}, ["a\nb"]], + [%Q{"\n\n\n"}, ["\n\n\n"]], + [%Q{a,"b\n\nc"}, ['a', "b\n\nc"]], + [%Q{,"\r\n"}, [nil,"\r\n"]], + [%Q{,"\r\n."}, [nil,"\r\n."]], + [%Q{"a\na","one newline"}, ["a\na", 'one newline']], + [%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']], + [%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']], + [%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']], + [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]], + ].each do |edge_case| + assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) + end + end + def test_non_regex_edge_cases # An early version of the non-regex parser fails this test [ [ "foo,\"foo,bar,baz,foo\",\"foo\"", -- cgit v1.2.3