From 4fcfa85cb6a44b4e8c7a47578b50064711dff404 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 19 Jul 2020 06:25:05 +0900 Subject: [ruby/csv] CSV.generate_line: use the encoding of the first non ASCII field as the expected encoding See also: https://github.com/ruby/stringio/issues/13#issuecomment-660543554 https://github.com/ruby/csv/commit/004cf49d18 --- lib/csv.rb | 16 ++++++++++++++-- test/csv/test_encodings.rb | 12 ++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/lib/csv.rb b/lib/csv.rb index 83898890c7..63309f3f2e 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -1289,8 +1289,20 @@ class CSV str = +"" if options[:encoding] str.force_encoding(options[:encoding]) - elsif field = row.find {|f| f.is_a?(String)} - str.force_encoding(field.encoding) + else + fallback_encoding = nil + output_encoding = nil + row.each do |field| + next unless field.is_a?(String) + fallback_encoding ||= field.encoding + next if field.ascii_only? + output_encoding = field.encoding + break + end + output_encoding ||= fallback_encoding + if output_encoding + str.force_encoding(output_encoding) + end end (new(str, **options) << row).string end diff --git a/test/csv/test_encodings.rb b/test/csv/test_encodings.rb index cd63af6a83..8d228c05f3 100644 --- a/test/csv/test_encodings.rb +++ b/test/csv/test_encodings.rb @@ -242,6 +242,18 @@ class TestCSVEncodings < Test::Unit::TestCase assert_equal("UTF-8", data.to_csv.encoding.name) end + def test_encoding_is_not_upgraded_for_non_ascii_content_during_writing_as_needed + data = ["\u00c0".encode("ISO-8859-1"), "\u3042"] + assert_equal([ + "ISO-8859-1", + "UTF-8", + ], + data.collect {|field| field.encoding.name}) + assert_raise(Encoding::CompatibilityError) do + data.to_csv + end + end + def test_explicit_encoding bug9766 = '[ruby-core:62113] [Bug #9766]' s = CSV.generate(encoding: "Windows-31J") do |csv| -- cgit v1.2.3