summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-25 00:54:38 +0000
committerjeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-25 00:54:38 +0000
commitdfda5877ca1f224b49ed17af1d44781fb2717a31 (patch)
tree02ba7391940c38967d801e8804939a1a20022982
parent7222e82a8b98afdc61b9cf6ad7419068429a42e9 (diff)
* lib/csv.rb: Fixed a bug in read_to_char() that would slurp
whole files if the encoding was invalid. It will now read up to 10 bytes ahead to find a valid character boundary or give up. [ruby-core:19465] * test/csv/test_features.rb, test/csv/test_table.rb, test/csv/test_row.rb: Loosened some tests to check for a compatible? Encoding instea of an exact Encoding. [ruby-core:19470] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19931 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog10
-rw-r--r--lib/csv.rb19
-rw-r--r--test/csv/test_features.rb6
-rw-r--r--test/csv/test_row.rb6
-rw-r--r--test/csv/test_table.rb6
5 files changed, 33 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index 38445d0870..69599e5076 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Sat Oct 25 09:54:10 2008 James Edward Gray II <jeg2@ruby-lang.org>
+
+ * lib/csv.rb: Fixed a bug in read_to_char() that would slurp
+ whole files if the encoding was invalid. It will now read
+ up to 10 bytes ahead to find a valid character boundary or
+ give up. [ruby-core:19465]
+ * test/csv/test_features.rb, test/csv/test_table.rb, test/csv/test_row.rb:
+ Loosened some tests to check for a compatible? Encoding instea
+ of an exact Encoding. [ruby-core:19470]
+
Sat Oct 25 07:42:49 2008 Eric Hodel <drbrain@segment7.net>
* lib/rdoc*: Update to RDoc 2.2.2 r192.
diff --git a/lib/csv.rb b/lib/csv.rb
index a9d9dbae97..6fb5da4d79 100644
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -199,7 +199,7 @@ require "stringio"
#
class CSV
# The version of the installed library.
- VERSION = "2.4.3".freeze
+ VERSION = "2.4.4".freeze
#
# A CSV::Row is part Array and part Hash. It retains an order for the fields
@@ -1551,7 +1551,7 @@ class CSV
end
@encoding ||= Encoding.default_internal || Encoding.default_external
#
- # prepare for build safe regular expressions in the target encoding,
+ # prepare for building safe regular expressions in the target encoding,
# if we can transcode the needed characters
#
@re_esc = "\\".encode(@encoding) rescue ""
@@ -2251,10 +2251,11 @@ class CSV
end
#
- # Reads at least +bytes+ from <tt>@io</tt>, but will read on until the data
- # read is valid in the ecoding of that data. This should ensure that it is
- # safe to use regular expressions on the read data. The read data will be
- # returned in <tt>@encoding</tt>.
+ # Reads at least +bytes+ from <tt>@io</tt>, but will read up 10 bytes ahead if
+ # needed to ensure the data read is valid in the ecoding of that data. This
+ # should ensure that it is safe to use regular expressions on the read data,
+ # unless it is actually a broken encoding. The read data will be returned in
+ # <tt>@encoding</tt>.
#
def read_to_char(bytes)
return "" if @io.eof?
@@ -2264,10 +2265,12 @@ class CSV
raise unless encoded.valid_encoding?
return encoded
rescue # encoding error or my invalid data raise
- if @io.eof?
+ if @io.eof? or data.size >= bytes + 10
return data
else
- data += @io.read(1) until data.valid_encoding? or @io.eof?
+ data += @io.read(1) until data.valid_encoding? or
+ @io.eof? or
+ data.size >= bytes + 10
retry
end
end
diff --git a/test/csv/test_features.rb b/test/csv/test_features.rb
index 88e23d2f16..9ed3f98191 100644
--- a/test/csv/test_features.rb
+++ b/test/csv/test_features.rb
@@ -250,9 +250,11 @@ class TestCSVFeatures < Test::Unit::TestCase
end
end
- def test_inspect_is_ascii_8bit_encoded
+ def test_inspect_encoding_is_ascii_compatible
CSV.new("one,two,three\n1,2,3\n".encode("UTF-16BE")) do |csv|
- assert_equal("ASCII-8BIT", csv.inspect.encoding.name)
+ assert( Encoding.compatible?( Encoding.find("US-ASCII"),
+ csv.inspect.encoding ),
+ "inspect() was not ASCII compatible." )
end
end
diff --git a/test/csv/test_row.rb b/test/csv/test_row.rb
index bd0aad4104..d0b0cdc406 100644
--- a/test/csv/test_row.rb
+++ b/test/csv/test_row.rb
@@ -296,8 +296,10 @@ class TestCSVRow < Test::Unit::TestCase
end
end
- def test_inspect_is_ascii_8bit_encoded
- assert_equal("ASCII-8BIT", @row.inspect.encoding.name)
+ def test_inspect_encoding_is_ascii_compatible
+ assert( Encoding.compatible?( Encoding.find("US-ASCII"),
+ @row.inspect.encoding ),
+ "inspect() was not ASCII compatible." )
end
def test_inspect_shows_symbol_headers_as_bare_attributes
diff --git a/test/csv/test_table.rb b/test/csv/test_table.rb
index 3c3cb1c155..d0b4d10103 100644
--- a/test/csv/test_table.rb
+++ b/test/csv/test_table.rb
@@ -400,7 +400,9 @@ class TestCSVTable < Test::Unit::TestCase
assert(str.include?("mode:#{@table.mode}"), "Mode not shown.")
end
- def test_inspect_is_us_ascii_encoded
- assert_equal("US-ASCII", @table.inspect.encoding.name)
+ def test_inspect_encoding_is_ascii_compatible
+ assert( Encoding.compatible?( Encoding.find("US-ASCII"),
+ @table.inspect.encoding ),
+ "inspect() was not ASCII compatible." )
end
end