* lib/csv.rb: Fixed a bug in read_to_char() that would slurp

whole files if the encoding was invalid. It will now read up to 10 bytes ahead to find a valid character boundary or give up. [ruby-core:19465] * test/csv/test_features.rb, test/csv/test_table.rb, test/csv/test_row.rb: Loosened some tests to check for a compatible? Encoding instea of an exact Encoding. [ruby-core:19470] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19931 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: jeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2008-10-25 00:54:38 +0000
committer: jeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2008-10-25 00:54:38 +0000
commit: dfda5877ca1f224b49ed17af1d44781fb2717a31 (patch)
tree: 02ba7391940c38967d801e8804939a1a20022982 /lib/csv.rb
parent: 7222e82a8b98afdc61b9cf6ad7419068429a42e9 (diff)
1 files changed, 11 insertions, 8 deletions
diff --git a/lib/csv.rb b/lib/csv.rb
index a9d9dbae97..6fb5da4d79 100644
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -199,7 +199,7 @@ require "stringio"
 # 
 class CSV
   # The version of the installed library.
-  VERSION = "2.4.3".freeze
+  VERSION = "2.4.4".freeze
   
   # 
   # A CSV::Row is part Array and part Hash.  It retains an order for the fields
@@ -1551,7 +1551,7 @@ class CSV
                   end
     @encoding ||= Encoding.default_internal || Encoding.default_external
     # 
-    # prepare for build safe regular expressions in the target encoding,
+    # prepare for building safe regular expressions in the target encoding,
     # if we can transcode the needed characters
     # 
     @re_esc   =   "\\".encode(@encoding) rescue ""
@@ -2251,10 +2251,11 @@ class CSV
   end
 
   # 
-  # Reads at least +bytes+ from <tt>@io</tt>, but will read on until the data
-  # read is valid in the ecoding of that data.  This should ensure that it is
-  # safe to use regular expressions on the read data.  The read data will be
-  # returned in <tt>@encoding</tt>.
+  # Reads at least +bytes+ from <tt>@io</tt>, but will read up 10 bytes ahead if
+  # needed to ensure the data read is valid in the ecoding of that data.  This
+  # should ensure that it is safe to use regular expressions on the read data,
+  # unless it is actually a broken encoding.  The read data will be returned in
+  # <tt>@encoding</tt>.
   # 
   def read_to_char(bytes)
     return "" if @io.eof?
@@ -2264,10 +2265,12 @@ class CSV
       raise unless encoded.valid_encoding?
       return encoded
     rescue  # encoding error or my invalid data raise
-      if @io.eof?
+      if @io.eof? or data.size >= bytes + 10
         return data
       else
-        data += @io.read(1) until data.valid_encoding? or @io.eof?
+        data += @io.read(1) until data.valid_encoding? or
+                                  @io.eof?             or
+                                  data.size >= bytes + 10
         retry
       end
     end
author	jeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2008-10-25 00:54:38 +0000
committer	jeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2008-10-25 00:54:38 +0000
commit	dfda5877ca1f224b49ed17af1d44781fb2717a31 (patch)
tree	02ba7391940c38967d801e8804939a1a20022982 /lib/csv.rb
parent	7222e82a8b98afdc61b9cf6ad7419068429a42e9 (diff)