diff options
Diffstat (limited to 'spec/ruby/core/encoding/compatible_spec.rb')
| -rw-r--r-- | spec/ruby/core/encoding/compatible_spec.rb | 455 |
1 files changed, 417 insertions, 38 deletions
diff --git a/spec/ruby/core/encoding/compatible_spec.rb b/spec/ruby/core/encoding/compatible_spec.rb index 80ecab6155..31376a3b75 100644 --- a/spec/ruby/core/encoding/compatible_spec.rb +++ b/spec/ruby/core/encoding/compatible_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../spec_helper' @@ -7,7 +7,7 @@ require_relative '../../spec_helper' describe "Encoding.compatible? String, String" do describe "when the first's Encoding is valid US-ASCII" do before :each do - @str = "abc".force_encoding Encoding::US_ASCII + @str = "abc".dup.force_encoding Encoding::US_ASCII end it "returns US-ASCII when the second's is US-ASCII" do @@ -33,28 +33,28 @@ describe "Encoding.compatible? String, String" do describe "when the first's Encoding is ASCII compatible and ASCII only" do it "returns the first's Encoding if the second is ASCII compatible and ASCII only" do - [ [Encoding, "abc".force_encoding("UTF-8"), "123".force_encoding("Shift_JIS"), Encoding::UTF_8], - [Encoding, "123".force_encoding("Shift_JIS"), "abc".force_encoding("UTF-8"), Encoding::Shift_JIS] + [ [Encoding, "abc".dup.force_encoding("UTF-8"), "123".dup.force_encoding("Shift_JIS"), Encoding::UTF_8], + [Encoding, "123".dup.force_encoding("Shift_JIS"), "abc".dup.force_encoding("UTF-8"), Encoding::Shift_JIS] ].should be_computed_by(:compatible?) end it "returns the first's Encoding if the second is ASCII compatible and ASCII only" do - [ [Encoding, "abc".force_encoding("BINARY"), "123".force_encoding("US-ASCII"), Encoding::BINARY], - [Encoding, "123".force_encoding("US-ASCII"), "abc".force_encoding("BINARY"), Encoding::US_ASCII] + [ [Encoding, "abc".dup.force_encoding("BINARY"), "123".dup.force_encoding("US-ASCII"), Encoding::BINARY], + [Encoding, "123".dup.force_encoding("US-ASCII"), "abc".dup.force_encoding("BINARY"), Encoding::US_ASCII] ].should be_computed_by(:compatible?) end it "returns the second's Encoding if the second is ASCII compatible but not ASCII only" do - [ [Encoding, "abc".force_encoding("UTF-8"), "\xff".force_encoding("Shift_JIS"), Encoding::Shift_JIS], - [Encoding, "123".force_encoding("Shift_JIS"), "\xff".force_encoding("UTF-8"), Encoding::UTF_8], - [Encoding, "abc".force_encoding("BINARY"), "\xff".force_encoding("US-ASCII"), Encoding::US_ASCII], - [Encoding, "123".force_encoding("US-ASCII"), "\xff".force_encoding("BINARY"), Encoding::BINARY], + [ [Encoding, "abc".dup.force_encoding("UTF-8"), "\xff".dup.force_encoding("Shift_JIS"), Encoding::Shift_JIS], + [Encoding, "123".dup.force_encoding("Shift_JIS"), "\xff".dup.force_encoding("UTF-8"), Encoding::UTF_8], + [Encoding, "abc".dup.force_encoding("BINARY"), "\xff".dup.force_encoding("US-ASCII"), Encoding::US_ASCII], + [Encoding, "123".dup.force_encoding("US-ASCII"), "\xff".dup.force_encoding("BINARY"), Encoding::BINARY], ].should be_computed_by(:compatible?) end it "returns nil if the second's Encoding is not ASCII compatible" do - a = "abc".force_encoding("UTF-8") - b = "1234".force_encoding("UTF-16LE") + a = "abc".dup.force_encoding("UTF-8") + b = "1234".dup.force_encoding("UTF-16LE") Encoding.compatible?(a, b).should be_nil end end @@ -75,7 +75,7 @@ describe "Encoding.compatible? String, String" do describe "when the first's Encoding is not ASCII compatible" do before :each do - @str = "abc".force_encoding Encoding::UTF_7 + @str = "abc".dup.force_encoding Encoding::UTF_7 end it "returns nil when the second String is US-ASCII" do @@ -91,14 +91,14 @@ describe "Encoding.compatible? String, String" do end it "returns the Encoding when the second's Encoding is not ASCII compatible but the same as the first's Encoding" do - encoding = Encoding.compatible?(@str, "def".force_encoding("utf-7")) + encoding = Encoding.compatible?(@str, "def".dup.force_encoding("utf-7")) encoding.should == Encoding::UTF_7 end end describe "when the first's Encoding is invalid" do before :each do - @str = "\xff".force_encoding Encoding::UTF_8 + @str = "\xff".dup.force_encoding Encoding::UTF_8 end it "returns the first's Encoding when the second's Encoding is US-ASCII" do @@ -114,11 +114,11 @@ describe "Encoding.compatible? String, String" do end it "returns nil when the second's Encoding is invalid and ASCII only" do - Encoding.compatible?(@str, "\x7f".force_encoding("utf-16be")).should be_nil + Encoding.compatible?(@str, "\x7f\x7f".dup.force_encoding("utf-16be")).should be_nil end it "returns nil when the second's Encoding is invalid and not ASCII only" do - Encoding.compatible?(@str, "\xff".force_encoding("utf-16be")).should be_nil + Encoding.compatible?(@str, "\xff\xff".dup.force_encoding("utf-16be")).should be_nil end it "returns the Encoding when the second's Encoding is invalid but the same as the first" do @@ -129,7 +129,7 @@ describe "Encoding.compatible? String, String" do describe "when the first String is empty and the second is not" do describe "and the first's Encoding is ASCII compatible" do before :each do - @str = "".force_encoding("utf-8") + @str = "".dup.force_encoding("utf-8") end it "returns the first's encoding when the second String is ASCII only" do @@ -143,7 +143,7 @@ describe "Encoding.compatible? String, String" do describe "when the first's Encoding is not ASCII compatible" do before :each do - @str = "".force_encoding Encoding::UTF_7 + @str = "".dup.force_encoding Encoding::UTF_7 end it "returns the second string's encoding" do @@ -154,18 +154,391 @@ describe "Encoding.compatible? String, String" do describe "when the second String is empty" do before :each do - @str = "abc".force_encoding("utf-7") + @str = "abc".dup.force_encoding("utf-7") end it "returns the first Encoding" do Encoding.compatible?(@str, "").should == Encoding::UTF_7 end end + + # Encoding negotiation depends on whether encodings are ASCII-compatible, empty + # and contain only ASCII characters (that take 7 bits). Check US-ASCII, UTF-8 and + # BINARY encodings (as most common) as well as an ASCII-compatible, a non-ASCII-compatible and a dummy + # encodings in all possible combinations. + describe "compatibility matrix" do + +# Use the following script to regenerate the matrix: +# +# ``` +# # encoding: binary +# +# ENCODINGS = [ +# "US-ASCII", +# "UTF-8", +# "ASCII-8BIT", +# "ISO-8859-1", # ASCII-compatible +# "UTF-16BE", # non-ASCII-compatible +# "ISO-2022-JP" # dummy +# ] +# +# TYPES = [:empty, :"7bits", :non7bits] +# +# VALUES = { +# empty: "", +# :"7bits" => "\x01\x01", +# non7bits: "\x01\x81" +# } +# +# ENCODINGS.product(TYPES, ENCODINGS, TYPES).each do |encoding1, type1, encoding2, type2| +# value1 = VALUES[type1].dup.force_encoding(encoding1) +# value2 = VALUES[type2].dup.force_encoding(encoding2) +# +# result_encoding = Encoding.compatible?(value1, value2) +# +# puts "[#{encoding1.inspect}, #{value1.inspect}, #{encoding2.inspect}, #{value2.inspect}, #{result_encoding&.name.inspect}]," +# end +# ``` + + matrix = [ + ["US-ASCII", "", "US-ASCII", "", "US-ASCII"], + ["US-ASCII", "", "US-ASCII", "\x01\x01", "US-ASCII"], + ["US-ASCII", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["US-ASCII", "", "UTF-8", "", "US-ASCII"], + ["US-ASCII", "", "UTF-8", "\u0001\u0001", "US-ASCII"], + ["US-ASCII", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["US-ASCII", "", "ASCII-8BIT", "", "US-ASCII"], + ["US-ASCII", "", "ASCII-8BIT", "\x01\x01", "US-ASCII"], + ["US-ASCII", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["US-ASCII", "", "ISO-8859-1", "", "US-ASCII"], + ["US-ASCII", "", "ISO-8859-1", "\x01\x01", "US-ASCII"], + ["US-ASCII", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["US-ASCII", "", "UTF-16BE", "", "US-ASCII"], + ["US-ASCII", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["US-ASCII", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["US-ASCII", "", "ISO-2022-JP", "", "US-ASCII"], + ["US-ASCII", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["US-ASCII", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["US-ASCII", "\x01\x01", "US-ASCII", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "US-ASCII", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x01", "US-ASCII", "\x01\x81", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-8", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-8", "\u0001\u0001", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-8", "\u0001\x81", "UTF-8"], + ["US-ASCII", "\x01\x01", "ASCII-8BIT", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ASCII-8BIT", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["US-ASCII", "\x01\x01", "ISO-8859-1", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ISO-8859-1", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["US-ASCII", "\x01\x01", "UTF-16BE", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["US-ASCII", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["US-ASCII", "\x01\x01", "ISO-2022-JP", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ISO-2022-JP", "\x01\x01", nil], + ["US-ASCII", "\x01\x01", "ISO-2022-JP", "\x01\x81", nil], + ["US-ASCII", "\x01\x81", "US-ASCII", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "US-ASCII", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x81", "US-ASCII", "\x01\x81", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-8", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-8", "\u0001\u0001", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["US-ASCII", "\x01\x81", "ASCII-8BIT", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ASCII-8BIT", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ASCII-8BIT", "\x01\x81", nil], + ["US-ASCII", "\x01\x81", "ISO-8859-1", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ISO-8859-1", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ISO-8859-1", "\x01\x81", nil], + ["US-ASCII", "\x01\x81", "UTF-16BE", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["US-ASCII", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["US-ASCII", "\x01\x81", "ISO-2022-JP", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ISO-2022-JP", "\x01\x01", nil], + ["US-ASCII", "\x01\x81", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-8", "", "US-ASCII", "", "UTF-8"], + ["UTF-8", "", "US-ASCII", "\x01\x01", "UTF-8"], + ["UTF-8", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["UTF-8", "", "UTF-8", "", "UTF-8"], + ["UTF-8", "", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-8", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-8", "", "ASCII-8BIT", "", "UTF-8"], + ["UTF-8", "", "ASCII-8BIT", "\x01\x01", "UTF-8"], + ["UTF-8", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["UTF-8", "", "ISO-8859-1", "", "UTF-8"], + ["UTF-8", "", "ISO-8859-1", "\x01\x01", "UTF-8"], + ["UTF-8", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["UTF-8", "", "UTF-16BE", "", "UTF-8"], + ["UTF-8", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-8", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-8", "", "ISO-2022-JP", "", "UTF-8"], + ["UTF-8", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["UTF-8", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["UTF-8", "\u0001\u0001", "US-ASCII", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "US-ASCII", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\u0001", "US-ASCII", "\x01\x81", "US-ASCII"], + ["UTF-8", "\u0001\u0001", "UTF-8", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-8", "\u0001\u0001", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ASCII-8BIT", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ASCII-8BIT", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["UTF-8", "\u0001\u0001", "ISO-8859-1", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ISO-8859-1", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["UTF-8", "\u0001\u0001", "UTF-16BE", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "UTF-16BE", "\u0101", nil], + ["UTF-8", "\u0001\u0001", "UTF-16BE", "\u0181", nil], + ["UTF-8", "\u0001\u0001", "ISO-2022-JP", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-8", "\u0001\u0001", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "US-ASCII", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "US-ASCII", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\x81", "US-ASCII", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "UTF-8", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-8", "\u0001\x81", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-8", "\u0001\x81", "ASCII-8BIT", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "ASCII-8BIT", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\x81", "ASCII-8BIT", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "ISO-8859-1", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "ISO-8859-1", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\x81", "ISO-8859-1", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "UTF-16BE", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "UTF-16BE", "\u0101", nil], + ["UTF-8", "\u0001\x81", "UTF-16BE", "\u0181", nil], + ["UTF-8", "\u0001\x81", "ISO-2022-JP", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-8", "\u0001\x81", "ISO-2022-JP", "\x01\x81", nil], + ["ASCII-8BIT", "", "US-ASCII", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "US-ASCII", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ASCII-8BIT", "", "UTF-8", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "UTF-8", "\u0001\u0001", "ASCII-8BIT"], + ["ASCII-8BIT", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["ASCII-8BIT", "", "ASCII-8BIT", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-8859-1", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-8859-1", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ASCII-8BIT", "", "UTF-16BE", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["ASCII-8BIT", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["ASCII-8BIT", "", "ISO-2022-JP", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ASCII-8BIT", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ASCII-8BIT", "\x01\x01", "US-ASCII", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "US-ASCII", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ASCII-8BIT", "\x01\x01", "UTF-8", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "UTF-8", "\u0001\u0001", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "UTF-8", "\u0001\x81", "UTF-8"], + ["ASCII-8BIT", "\x01\x01", "ASCII-8BIT", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-8859-1", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-8859-1", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ASCII-8BIT", "\x01\x01", "UTF-16BE", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["ASCII-8BIT", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["ASCII-8BIT", "\x01\x01", "ISO-2022-JP", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-2022-JP", "\x01\x01", nil], + ["ASCII-8BIT", "\x01\x01", "ISO-2022-JP", "\x01\x81", nil], + ["ASCII-8BIT", "\x01\x81", "US-ASCII", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "US-ASCII", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "US-ASCII", "\x01\x81", nil], + ["ASCII-8BIT", "\x01\x81", "UTF-8", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "UTF-8", "\u0001\u0001", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["ASCII-8BIT", "\x01\x81", "ASCII-8BIT", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-8859-1", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-8859-1", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-8859-1", "\x01\x81", nil], + ["ASCII-8BIT", "\x01\x81", "UTF-16BE", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["ASCII-8BIT", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["ASCII-8BIT", "\x01\x81", "ISO-2022-JP", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-2022-JP", "\x01\x01", nil], + ["ASCII-8BIT", "\x01\x81", "ISO-2022-JP", "\x01\x81", nil], + ["ISO-8859-1", "", "US-ASCII", "", "ISO-8859-1"], + ["ISO-8859-1", "", "US-ASCII", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ISO-8859-1", "", "UTF-8", "", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-8", "\u0001\u0001", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["ISO-8859-1", "", "ASCII-8BIT", "", "ISO-8859-1"], + ["ISO-8859-1", "", "ASCII-8BIT", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ISO-8859-1", "", "ISO-8859-1", "", "ISO-8859-1"], + ["ISO-8859-1", "", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-16BE", "", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["ISO-8859-1", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["ISO-8859-1", "", "ISO-2022-JP", "", "ISO-8859-1"], + ["ISO-8859-1", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-8859-1", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ISO-8859-1", "\x01\x01", "US-ASCII", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "US-ASCII", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ISO-8859-1", "\x01\x01", "UTF-8", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-8", "\u0001\u0001", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-8", "\u0001\x81", "UTF-8"], + ["ISO-8859-1", "\x01\x01", "ASCII-8BIT", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ASCII-8BIT", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ISO-8859-1", "\x01\x01", "ISO-8859-1", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-16BE", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["ISO-8859-1", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["ISO-8859-1", "\x01\x01", "ISO-2022-JP", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ISO-2022-JP", "\x01\x01", nil], + ["ISO-8859-1", "\x01\x01", "ISO-2022-JP", "\x01\x81", nil], + ["ISO-8859-1", "\x01\x81", "US-ASCII", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "US-ASCII", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "US-ASCII", "\x01\x81", nil], + ["ISO-8859-1", "\x01\x81", "UTF-8", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-8", "\u0001\u0001", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["ISO-8859-1", "\x01\x81", "ASCII-8BIT", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ASCII-8BIT", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ASCII-8BIT", "\x01\x81", nil], + ["ISO-8859-1", "\x01\x81", "ISO-8859-1", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-16BE", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["ISO-8859-1", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["ISO-8859-1", "\x01\x81", "ISO-2022-JP", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ISO-2022-JP", "\x01\x01", nil], + ["ISO-8859-1", "\x01\x81", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-16BE", "", "US-ASCII", "", "UTF-16BE"], + ["UTF-16BE", "", "US-ASCII", "\x01\x01", "US-ASCII"], + ["UTF-16BE", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["UTF-16BE", "", "UTF-8", "", "UTF-16BE"], + ["UTF-16BE", "", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-16BE", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-16BE", "", "ASCII-8BIT", "", "UTF-16BE"], + ["UTF-16BE", "", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["UTF-16BE", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["UTF-16BE", "", "ISO-8859-1", "", "UTF-16BE"], + ["UTF-16BE", "", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["UTF-16BE", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["UTF-16BE", "", "UTF-16BE", "", "UTF-16BE"], + ["UTF-16BE", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-16BE", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-16BE", "", "ISO-2022-JP", "", "UTF-16BE"], + ["UTF-16BE", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["UTF-16BE", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["UTF-16BE", "\u0101", "US-ASCII", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "US-ASCII", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "US-ASCII", "\x01\x81", nil], + ["UTF-16BE", "\u0101", "UTF-8", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "UTF-8", "\u0001\u0001", nil], + ["UTF-16BE", "\u0101", "UTF-8", "\u0001\x81", nil], + ["UTF-16BE", "\u0101", "ASCII-8BIT", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ASCII-8BIT", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "ASCII-8BIT", "\x01\x81", nil], + ["UTF-16BE", "\u0101", "ISO-8859-1", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ISO-8859-1", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "ISO-8859-1", "\x01\x81", nil], + ["UTF-16BE", "\u0101", "UTF-16BE", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-16BE", "\u0101", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ISO-2022-JP", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "US-ASCII", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "US-ASCII", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "US-ASCII", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "UTF-8", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "UTF-8", "\u0001\u0001", nil], + ["UTF-16BE", "\u0181", "UTF-8", "\u0001\x81", nil], + ["UTF-16BE", "\u0181", "ASCII-8BIT", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ASCII-8BIT", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "ASCII-8BIT", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "ISO-8859-1", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ISO-8859-1", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "ISO-8859-1", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "UTF-16BE", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-16BE", "\u0181", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ISO-2022-JP", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "ISO-2022-JP", "\x01\x81", nil], + ["ISO-2022-JP", "", "US-ASCII", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "US-ASCII", "\x01\x01", "US-ASCII"], + ["ISO-2022-JP", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ISO-2022-JP", "", "UTF-8", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "UTF-8", "\u0001\u0001", "UTF-8"], + ["ISO-2022-JP", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["ISO-2022-JP", "", "ASCII-8BIT", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ISO-2022-JP", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ISO-2022-JP", "", "ISO-8859-1", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-2022-JP", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-2022-JP", "", "UTF-16BE", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["ISO-2022-JP", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["ISO-2022-JP", "", "ISO-2022-JP", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "US-ASCII", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "US-ASCII", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x01", "US-ASCII", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-8", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "UTF-8", "\u0001\u0001", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-8", "\u0001\x81", nil], + ["ISO-2022-JP", "\x01\x01", "ASCII-8BIT", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ASCII-8BIT", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x01", "ASCII-8BIT", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x01", "ISO-8859-1", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ISO-8859-1", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x01", "ISO-8859-1", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-16BE", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["ISO-2022-JP", "\x01\x01", "ISO-2022-JP", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "US-ASCII", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "US-ASCII", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x81", "US-ASCII", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-8", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "UTF-8", "\u0001\u0001", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["ISO-2022-JP", "\x01\x81", "ASCII-8BIT", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ASCII-8BIT", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x81", "ASCII-8BIT", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x81", "ISO-8859-1", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ISO-8859-1", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x81", "ISO-8859-1", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-16BE", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["ISO-2022-JP", "\x01\x81", "ISO-2022-JP", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ] + + matrix.each do |encoding1, value1, encoding2, value2, compatible_encoding| + it "returns #{compatible_encoding} for #{value1.inspect} in #{encoding1} and #{value2.inspect} in #{encoding2}" do + actual_encoding = Encoding.compatible?(value1.dup.force_encoding(encoding1), value2.dup.force_encoding(encoding2)) + actual_encoding&.name.should == compatible_encoding + end + end + end end describe "Encoding.compatible? String, Regexp" do it "returns US-ASCII if both are US-ASCII" do - str = "abc".force_encoding("us-ascii") + str = "abc".dup.force_encoding("us-ascii") Encoding.compatible?(str, /abc/).should == Encoding::US_ASCII end @@ -180,15 +553,15 @@ describe "Encoding.compatible? String, Regexp" do it "returns the String's Encoding if the String is not ASCII only" do [ [Encoding, "\xff", Encoding::BINARY], [Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8], - [Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::EUC_JP], - [Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::Shift_JIS], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp"), Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis"), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, /abc/) end end describe "Encoding.compatible? String, Symbol" do it "returns US-ASCII if both are ASCII only" do - str = "abc".force_encoding("us-ascii") + str = "abc".dup.force_encoding("us-ascii") Encoding.compatible?(str, :abc).should == Encoding::US_ASCII end @@ -203,8 +576,8 @@ describe "Encoding.compatible? String, Symbol" do it "returns the String's Encoding if the String is not ASCII only" do [ [Encoding, "\xff", Encoding::BINARY], [Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8], - [Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::EUC_JP], - [Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::Shift_JIS], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp"), Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis"), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, :abc) end end @@ -221,8 +594,8 @@ describe "Encoding.compatible? String, Encoding" do it "returns the String's encoding if the Encoding is US-ASCII" do [ [Encoding, "\xff", Encoding::BINARY], [Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8], - [Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::EUC_JP], - [Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::Shift_JIS], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp"), Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis"), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, Encoding::US_ASCII) end @@ -242,7 +615,7 @@ end describe "Encoding.compatible? Regexp, String" do it "returns US-ASCII if both are US-ASCII" do - str = "abc".force_encoding("us-ascii") + str = "abc".dup.force_encoding("us-ascii") Encoding.compatible?(/abc/, str).should == Encoding::US_ASCII end @@ -256,8 +629,8 @@ describe "Encoding.compatible? Regexp, Regexp" do it "returns the first's Encoding if it is not US-ASCII and not ASCII only" do [ [Encoding, Regexp.new("\xff"), Encoding::BINARY], [Encoding, Regexp.new("\u3042".encode("utf-8")), Encoding::UTF_8], - [Encoding, Regexp.new("\xa4\xa2".force_encoding("euc-jp")), Encoding::EUC_JP], - [Encoding, Regexp.new("\x82\xa0".force_encoding("shift_jis")), Encoding::Shift_JIS], + [Encoding, Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")), Encoding::EUC_JP], + [Encoding, Regexp.new("\x82\xa0".dup.force_encoding("shift_jis")), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, /abc/) end end @@ -270,15 +643,15 @@ describe "Encoding.compatible? Regexp, Symbol" do it "returns the first's Encoding if it is not US-ASCII and not ASCII only" do [ [Encoding, Regexp.new("\xff"), Encoding::BINARY], [Encoding, Regexp.new("\u3042".encode("utf-8")), Encoding::UTF_8], - [Encoding, Regexp.new("\xa4\xa2".force_encoding("euc-jp")), Encoding::EUC_JP], - [Encoding, Regexp.new("\x82\xa0".force_encoding("shift_jis")), Encoding::Shift_JIS], + [Encoding, Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")), Encoding::EUC_JP], + [Encoding, Regexp.new("\x82\xa0".dup.force_encoding("shift_jis")), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, /abc/) end end describe "Encoding.compatible? Symbol, String" do it "returns US-ASCII if both are ASCII only" do - str = "abc".force_encoding("us-ascii") + str = "abc".dup.force_encoding("us-ascii") Encoding.compatible?(str, :abc).should == Encoding::US_ASCII end end @@ -291,8 +664,8 @@ describe "Encoding.compatible? Symbol, Regexp" do it "returns the Regexp's Encoding if it is not US-ASCII and not ASCII only" do a = Regexp.new("\xff") b = Regexp.new("\u3042".encode("utf-8")) - c = Regexp.new("\xa4\xa2".force_encoding("euc-jp")) - d = Regexp.new("\x82\xa0".force_encoding("shift_jis")) + c = Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")) + d = Regexp.new("\x82\xa0".dup.force_encoding("shift_jis")) [ [Encoding, :abc, a, Encoding::BINARY], [Encoding, :abc, b, Encoding::UTF_8], @@ -310,8 +683,8 @@ describe "Encoding.compatible? Symbol, Symbol" do it "returns the first's Encoding if it is not ASCII only" do [ [Encoding, "\xff".to_sym, Encoding::BINARY], [Encoding, "\u3042".encode("utf-8").to_sym, Encoding::UTF_8], - [Encoding, "\xa4\xa2".force_encoding("euc-jp").to_sym, Encoding::EUC_JP], - [Encoding, "\x82\xa0".force_encoding("shift_jis").to_sym, Encoding::Shift_JIS], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp").to_sym, Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis").to_sym, Encoding::Shift_JIS], ].should be_computed_by(:compatible?, :abc) end end @@ -377,3 +750,9 @@ describe "Encoding.compatible? Object, Object" do Encoding.compatible?(:sym, Object.new).should be_nil end end + +describe "Encoding.compatible? nil, nil" do + it "returns nil" do + Encoding.compatible?(nil, nil).should be_nil + end +end |
