diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | lib/csv.rb | 42 | ||||
-rwxr-xr-x | test/csv/test_encodings.rb | 18 |
3 files changed, 25 insertions, 40 deletions
@@ -1,4 +1,7 @@ -Sat Dec 25 16:04:34 2010 Nobuyoshi Nakada <nobu@ruby-lang.org> +Sat Dec 25 17:32:24 2010 Nobuyoshi Nakada <nobu@ruby-lang.org> + + * lib/csv.rb (CSV#init_separators): use IO#gets with length + parameter to get rid of wrong convertion. * lib/csv.rb (CSV::foreach, CSV#initialize): directly use encoding diff --git a/lib/csv.rb b/lib/csv.rb index 092424e33c..278abc1eea 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -1573,10 +1573,7 @@ class CSV # if we can transcode the needed characters # @re_esc = "\\".encode(@encoding) rescue "" - @re_chars = %w[ \\ . [ ] - ^ $ ? - * + { } ( ) | # - \ \r \n \t \f \v ]. - map { |s| s.encode(@encoding) rescue nil }.compact + @re_chars = /#{%"[-][\\.^$?*+{}()|# \r\n\t\f\v]".encode(@encoding, fallback: proc{""})}/ init_separators(options) init_parsers(options) @@ -2025,15 +2022,13 @@ class CSV # if we run out of data, it's probably a single line # (use a sensible default) # - if @io.eof? + unless sample = @io.gets(nil, 1024) @row_sep = $INPUT_RECORD_SEPARATOR break end # read ahead a bit - sample = read_to_char(1024) - sample += read_to_char(1) if sample[-1..-1] == encode_str("\r") and - not @io.eof? + sample << (@io.gets(nil, 1) || "") if sample.end_with?(encode_str("\r")) # try to find a standard separator if sample =~ encode_re("\r\n?|\n") @row_sep = $& @@ -2267,7 +2262,7 @@ class CSV # a backslash cannot be transcoded. # def escape_re(str) - str.chars.map { |c| @re_chars.include?(c) ? @re_esc + c : c }.join('') + str.gsub(@re_chars) {|c| @re_esc + c} end # @@ -2286,31 +2281,6 @@ class CSV chunks.map { |chunk| chunk.encode(@encoding.name) }.join('') end - # - # Reads at least +bytes+ from <tt>@io</tt>, but will read up 10 bytes ahead if - # needed to ensure the data read is valid in the ecoding of that data. This - # should ensure that it is safe to use regular expressions on the read data, - # unless it is actually a broken encoding. The read data will be returned in - # <tt>@encoding</tt>. - # - def read_to_char(bytes) - return "" if @io.eof? - data = read_io(bytes) - begin - raise unless data.valid_encoding? - encoded = encode_str(data) - raise unless encoded.valid_encoding? - return encoded - rescue # encoding error or my invalid data raise - if @io.eof? or data.size >= bytes + 10 - return data - else - data += read_io(1) - retry - end - end - end - private def raw_encoding @@ -2324,10 +2294,6 @@ class CSV Encoding::ASCII_8BIT end end - - def read_io(bytes) - @io.read(bytes).force_encoding(raw_encoding) - end end # Another name for CSV::instance(). diff --git a/test/csv/test_encodings.rb b/test/csv/test_encodings.rb index 0f2ec127c5..59f43b1be9 100755 --- a/test/csv/test_encodings.rb +++ b/test/csv/test_encodings.rb @@ -238,12 +238,28 @@ class TestCSV::Encodings < TestCSV def assert_parses(fields, encoding, options = { }) encoding = Encoding.find(encoding) unless encoding.is_a? Encoding + orig_fields = fields fields = encode_ary(fields, encoding) - parsed = CSV.parse(ary_to_data(fields, options), options) + data = ary_to_data(fields, options) + parsed = CSV.parse(data, options) assert_equal(fields, parsed) parsed.flatten.each_with_index do |field, i| assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.") end + File.open(@temp_csv_path, "wb") {|f| f.print(data)} + CSV.open(@temp_csv_path, "rb:#{encoding}", options) do |csv| + csv.each_with_index do |row, i| + assert_equal(fields[i], row) + end + end + begin + CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", options) do |csv| + csv.each_with_index do |row, i| + assert_equal(orig_fields[i], row) + end + end unless encoding == __ENCODING__ + rescue Encoding::ConverterNotFoundError + end end def encode_ary(ary, encoding) |