diff options
Diffstat (limited to 'spec/ruby/core/encoding/compatible_spec.rb')
| -rw-r--r-- | spec/ruby/core/encoding/compatible_spec.rb | 549 |
1 files changed, 471 insertions, 78 deletions
diff --git a/spec/ruby/core/encoding/compatible_spec.rb b/spec/ruby/core/encoding/compatible_spec.rb index 8c2e4d7e09..0d620e5bf3 100644 --- a/spec/ruby/core/encoding/compatible_spec.rb +++ b/spec/ruby/core/encoding/compatible_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: ascii-8bit -*- +# encoding: binary require_relative '../../spec_helper' @@ -7,19 +7,19 @@ require_relative '../../spec_helper' describe "Encoding.compatible? String, String" do describe "when the first's Encoding is valid US-ASCII" do before :each do - @str = "abc".force_encoding Encoding::US_ASCII + @str = "abc".dup.force_encoding Encoding::US_ASCII end it "returns US-ASCII when the second's is US-ASCII" do Encoding.compatible?(@str, "def".encode("us-ascii")).should == Encoding::US_ASCII end - it "returns US-ASCII if the second String is ASCII-8BIT and ASCII only" do + it "returns US-ASCII if the second String is BINARY and ASCII only" do Encoding.compatible?(@str, "\x7f").should == Encoding::US_ASCII end - it "returns ASCII-8BIT if the second String is ASCII-8BIT but not ASCII only" do - Encoding.compatible?(@str, "\xff").should == Encoding::ASCII_8BIT + it "returns BINARY if the second String is BINARY but not ASCII only" do + Encoding.compatible?(@str, "\xff").should == Encoding::BINARY end it "returns US-ASCII if the second String is UTF-8 and ASCII only" do @@ -33,72 +33,72 @@ describe "Encoding.compatible? String, String" do describe "when the first's Encoding is ASCII compatible and ASCII only" do it "returns the first's Encoding if the second is ASCII compatible and ASCII only" do - [ [Encoding, "abc".force_encoding("UTF-8"), "123".force_encoding("Shift_JIS"), Encoding::UTF_8], - [Encoding, "123".force_encoding("Shift_JIS"), "abc".force_encoding("UTF-8"), Encoding::Shift_JIS] + [ [Encoding, "abc".dup.force_encoding("UTF-8"), "123".dup.force_encoding("Shift_JIS"), Encoding::UTF_8], + [Encoding, "123".dup.force_encoding("Shift_JIS"), "abc".dup.force_encoding("UTF-8"), Encoding::Shift_JIS] ].should be_computed_by(:compatible?) end it "returns the first's Encoding if the second is ASCII compatible and ASCII only" do - [ [Encoding, "abc".force_encoding("ASCII-8BIT"), "123".force_encoding("US-ASCII"), Encoding::ASCII_8BIT], - [Encoding, "123".force_encoding("US-ASCII"), "abc".force_encoding("ASCII-8BIT"), Encoding::US_ASCII] + [ [Encoding, "abc".dup.force_encoding("BINARY"), "123".dup.force_encoding("US-ASCII"), Encoding::BINARY], + [Encoding, "123".dup.force_encoding("US-ASCII"), "abc".dup.force_encoding("BINARY"), Encoding::US_ASCII] ].should be_computed_by(:compatible?) end it "returns the second's Encoding if the second is ASCII compatible but not ASCII only" do - [ [Encoding, "abc".force_encoding("UTF-8"), "\xff".force_encoding("Shift_JIS"), Encoding::Shift_JIS], - [Encoding, "123".force_encoding("Shift_JIS"), "\xff".force_encoding("UTF-8"), Encoding::UTF_8], - [Encoding, "abc".force_encoding("ASCII-8BIT"), "\xff".force_encoding("US-ASCII"), Encoding::US_ASCII], - [Encoding, "123".force_encoding("US-ASCII"), "\xff".force_encoding("ASCII-8BIT"), Encoding::ASCII_8BIT], + [ [Encoding, "abc".dup.force_encoding("UTF-8"), "\xff".dup.force_encoding("Shift_JIS"), Encoding::Shift_JIS], + [Encoding, "123".dup.force_encoding("Shift_JIS"), "\xff".dup.force_encoding("UTF-8"), Encoding::UTF_8], + [Encoding, "abc".dup.force_encoding("BINARY"), "\xff".dup.force_encoding("US-ASCII"), Encoding::US_ASCII], + [Encoding, "123".dup.force_encoding("US-ASCII"), "\xff".dup.force_encoding("BINARY"), Encoding::BINARY], ].should be_computed_by(:compatible?) end it "returns nil if the second's Encoding is not ASCII compatible" do - a = "abc".force_encoding("UTF-8") - b = "123".force_encoding("UTF-16LE") - Encoding.compatible?(a, b).should be_nil + a = "abc".dup.force_encoding("UTF-8") + b = "1234".dup.force_encoding("UTF-16LE") + Encoding.compatible?(a, b).should == nil end end describe "when the first's Encoding is ASCII compatible but not ASCII only" do it "returns the first's Encoding if the second's is valid US-ASCII" do - Encoding.compatible?("\xff", "def".encode("us-ascii")).should == Encoding::ASCII_8BIT + Encoding.compatible?("\xff", "def".encode("us-ascii")).should == Encoding::BINARY end it "returns the first's Encoding if the second's is UTF-8 and ASCII only" do - Encoding.compatible?("\xff", "\u{7f}".encode("utf-8")).should == Encoding::ASCII_8BIT + Encoding.compatible?("\xff", "\u{7f}".encode("utf-8")).should == Encoding::BINARY end it "returns nil if the second encoding is ASCII compatible but neither String's encoding is ASCII only" do - Encoding.compatible?("\xff", "\u3042".encode("utf-8")).should be_nil + Encoding.compatible?("\xff", "\u3042".encode("utf-8")).should == nil end end describe "when the first's Encoding is not ASCII compatible" do before :each do - @str = "abc".force_encoding Encoding::UTF_7 + @str = "abc".dup.force_encoding Encoding::UTF_7 end it "returns nil when the second String is US-ASCII" do - Encoding.compatible?(@str, "def".encode("us-ascii")).should be_nil + Encoding.compatible?(@str, "def".encode("us-ascii")).should == nil end - it "returns nil when the second String is ASCII-8BIT and ASCII only" do - Encoding.compatible?(@str, "\x7f").should be_nil + it "returns nil when the second String is BINARY and ASCII only" do + Encoding.compatible?(@str, "\x7f").should == nil end - it "returns nil when the second String is ASCII-8BIT but not ASCII only" do - Encoding.compatible?(@str, "\xff").should be_nil + it "returns nil when the second String is BINARY but not ASCII only" do + Encoding.compatible?(@str, "\xff").should == nil end it "returns the Encoding when the second's Encoding is not ASCII compatible but the same as the first's Encoding" do - encoding = Encoding.compatible?(@str, "def".force_encoding("utf-7")) + encoding = Encoding.compatible?(@str, "def".dup.force_encoding("utf-7")) encoding.should == Encoding::UTF_7 end end describe "when the first's Encoding is invalid" do before :each do - @str = "\xff".force_encoding Encoding::UTF_8 + @str = "\xff".dup.force_encoding Encoding::UTF_8 end it "returns the first's Encoding when the second's Encoding is US-ASCII" do @@ -109,16 +109,16 @@ describe "Encoding.compatible? String, String" do Encoding.compatible?(@str, "\x7f").should == Encoding::UTF_8 end - it "returns nil when the second's Encoding is ASCII-8BIT but not ASCII only" do - Encoding.compatible?(@str, "\xff").should be_nil + it "returns nil when the second's Encoding is BINARY but not ASCII only" do + Encoding.compatible?(@str, "\xff").should == nil end it "returns nil when the second's Encoding is invalid and ASCII only" do - Encoding.compatible?(@str, "\x7f".force_encoding("utf-16be")).should be_nil + Encoding.compatible?(@str, "\x7f\x7f".dup.force_encoding("utf-16be")).should == nil end it "returns nil when the second's Encoding is invalid and not ASCII only" do - Encoding.compatible?(@str, "\xff".force_encoding("utf-16be")).should be_nil + Encoding.compatible?(@str, "\xff\xff".dup.force_encoding("utf-16be")).should == nil end it "returns the Encoding when the second's Encoding is invalid but the same as the first" do @@ -129,7 +129,7 @@ describe "Encoding.compatible? String, String" do describe "when the first String is empty and the second is not" do describe "and the first's Encoding is ASCII compatible" do before :each do - @str = "".force_encoding("utf-8") + @str = "".dup.force_encoding("utf-8") end it "returns the first's encoding when the second String is ASCII only" do @@ -143,7 +143,7 @@ describe "Encoding.compatible? String, String" do describe "when the first's Encoding is not ASCII compatible" do before :each do - @str = "".force_encoding Encoding::UTF_7 + @str = "".dup.force_encoding Encoding::UTF_7 end it "returns the second string's encoding" do @@ -154,23 +154,396 @@ describe "Encoding.compatible? String, String" do describe "when the second String is empty" do before :each do - @str = "abc".force_encoding("utf-7") + @str = "abc".dup.force_encoding("utf-7") end it "returns the first Encoding" do Encoding.compatible?(@str, "").should == Encoding::UTF_7 end end + + # Encoding negotiation depends on whether encodings are ASCII-compatible, empty + # and contain only ASCII characters (that take 7 bits). Check US-ASCII, UTF-8 and + # BINARY encodings (as most common) as well as an ASCII-compatible, a non-ASCII-compatible and a dummy + # encodings in all possible combinations. + describe "compatibility matrix" do + +# Use the following script to regenerate the matrix: +# +# ``` +# # encoding: binary +# +# ENCODINGS = [ +# "US-ASCII", +# "UTF-8", +# "ASCII-8BIT", +# "ISO-8859-1", # ASCII-compatible +# "UTF-16BE", # non-ASCII-compatible +# "ISO-2022-JP" # dummy +# ] +# +# TYPES = [:empty, :"7bits", :non7bits] +# +# VALUES = { +# empty: "", +# :"7bits" => "\x01\x01", +# non7bits: "\x01\x81" +# } +# +# ENCODINGS.product(TYPES, ENCODINGS, TYPES).each do |encoding1, type1, encoding2, type2| +# value1 = VALUES[type1].dup.force_encoding(encoding1) +# value2 = VALUES[type2].dup.force_encoding(encoding2) +# +# result_encoding = Encoding.compatible?(value1, value2) +# +# puts "[#{encoding1.inspect}, #{value1.inspect}, #{encoding2.inspect}, #{value2.inspect}, #{result_encoding&.name.inspect}]," +# end +# ``` + + matrix = [ + ["US-ASCII", "", "US-ASCII", "", "US-ASCII"], + ["US-ASCII", "", "US-ASCII", "\x01\x01", "US-ASCII"], + ["US-ASCII", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["US-ASCII", "", "UTF-8", "", "US-ASCII"], + ["US-ASCII", "", "UTF-8", "\u0001\u0001", "US-ASCII"], + ["US-ASCII", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["US-ASCII", "", "ASCII-8BIT", "", "US-ASCII"], + ["US-ASCII", "", "ASCII-8BIT", "\x01\x01", "US-ASCII"], + ["US-ASCII", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["US-ASCII", "", "ISO-8859-1", "", "US-ASCII"], + ["US-ASCII", "", "ISO-8859-1", "\x01\x01", "US-ASCII"], + ["US-ASCII", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["US-ASCII", "", "UTF-16BE", "", "US-ASCII"], + ["US-ASCII", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["US-ASCII", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["US-ASCII", "", "ISO-2022-JP", "", "US-ASCII"], + ["US-ASCII", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["US-ASCII", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["US-ASCII", "\x01\x01", "US-ASCII", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "US-ASCII", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x01", "US-ASCII", "\x01\x81", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-8", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-8", "\u0001\u0001", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-8", "\u0001\x81", "UTF-8"], + ["US-ASCII", "\x01\x01", "ASCII-8BIT", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ASCII-8BIT", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["US-ASCII", "\x01\x01", "ISO-8859-1", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ISO-8859-1", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["US-ASCII", "\x01\x01", "UTF-16BE", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["US-ASCII", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["US-ASCII", "\x01\x01", "ISO-2022-JP", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ISO-2022-JP", "\x01\x01", nil], + ["US-ASCII", "\x01\x01", "ISO-2022-JP", "\x01\x81", nil], + ["US-ASCII", "\x01\x81", "US-ASCII", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "US-ASCII", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x81", "US-ASCII", "\x01\x81", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-8", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-8", "\u0001\u0001", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["US-ASCII", "\x01\x81", "ASCII-8BIT", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ASCII-8BIT", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ASCII-8BIT", "\x01\x81", nil], + ["US-ASCII", "\x01\x81", "ISO-8859-1", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ISO-8859-1", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ISO-8859-1", "\x01\x81", nil], + ["US-ASCII", "\x01\x81", "UTF-16BE", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["US-ASCII", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["US-ASCII", "\x01\x81", "ISO-2022-JP", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ISO-2022-JP", "\x01\x01", nil], + ["US-ASCII", "\x01\x81", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-8", "", "US-ASCII", "", "UTF-8"], + ["UTF-8", "", "US-ASCII", "\x01\x01", "UTF-8"], + ["UTF-8", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["UTF-8", "", "UTF-8", "", "UTF-8"], + ["UTF-8", "", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-8", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-8", "", "ASCII-8BIT", "", "UTF-8"], + ["UTF-8", "", "ASCII-8BIT", "\x01\x01", "UTF-8"], + ["UTF-8", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["UTF-8", "", "ISO-8859-1", "", "UTF-8"], + ["UTF-8", "", "ISO-8859-1", "\x01\x01", "UTF-8"], + ["UTF-8", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["UTF-8", "", "UTF-16BE", "", "UTF-8"], + ["UTF-8", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-8", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-8", "", "ISO-2022-JP", "", "UTF-8"], + ["UTF-8", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["UTF-8", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["UTF-8", "\u0001\u0001", "US-ASCII", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "US-ASCII", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\u0001", "US-ASCII", "\x01\x81", "US-ASCII"], + ["UTF-8", "\u0001\u0001", "UTF-8", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-8", "\u0001\u0001", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ASCII-8BIT", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ASCII-8BIT", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["UTF-8", "\u0001\u0001", "ISO-8859-1", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ISO-8859-1", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["UTF-8", "\u0001\u0001", "UTF-16BE", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "UTF-16BE", "\u0101", nil], + ["UTF-8", "\u0001\u0001", "UTF-16BE", "\u0181", nil], + ["UTF-8", "\u0001\u0001", "ISO-2022-JP", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-8", "\u0001\u0001", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "US-ASCII", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "US-ASCII", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\x81", "US-ASCII", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "UTF-8", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-8", "\u0001\x81", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-8", "\u0001\x81", "ASCII-8BIT", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "ASCII-8BIT", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\x81", "ASCII-8BIT", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "ISO-8859-1", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "ISO-8859-1", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\x81", "ISO-8859-1", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "UTF-16BE", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "UTF-16BE", "\u0101", nil], + ["UTF-8", "\u0001\x81", "UTF-16BE", "\u0181", nil], + ["UTF-8", "\u0001\x81", "ISO-2022-JP", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-8", "\u0001\x81", "ISO-2022-JP", "\x01\x81", nil], + ["ASCII-8BIT", "", "US-ASCII", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "US-ASCII", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ASCII-8BIT", "", "UTF-8", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "UTF-8", "\u0001\u0001", "ASCII-8BIT"], + ["ASCII-8BIT", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["ASCII-8BIT", "", "ASCII-8BIT", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-8859-1", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-8859-1", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ASCII-8BIT", "", "UTF-16BE", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["ASCII-8BIT", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["ASCII-8BIT", "", "ISO-2022-JP", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ASCII-8BIT", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ASCII-8BIT", "\x01\x01", "US-ASCII", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "US-ASCII", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ASCII-8BIT", "\x01\x01", "UTF-8", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "UTF-8", "\u0001\u0001", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "UTF-8", "\u0001\x81", "UTF-8"], + ["ASCII-8BIT", "\x01\x01", "ASCII-8BIT", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-8859-1", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-8859-1", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ASCII-8BIT", "\x01\x01", "UTF-16BE", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["ASCII-8BIT", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["ASCII-8BIT", "\x01\x01", "ISO-2022-JP", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-2022-JP", "\x01\x01", nil], + ["ASCII-8BIT", "\x01\x01", "ISO-2022-JP", "\x01\x81", nil], + ["ASCII-8BIT", "\x01\x81", "US-ASCII", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "US-ASCII", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "US-ASCII", "\x01\x81", nil], + ["ASCII-8BIT", "\x01\x81", "UTF-8", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "UTF-8", "\u0001\u0001", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["ASCII-8BIT", "\x01\x81", "ASCII-8BIT", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-8859-1", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-8859-1", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-8859-1", "\x01\x81", nil], + ["ASCII-8BIT", "\x01\x81", "UTF-16BE", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["ASCII-8BIT", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["ASCII-8BIT", "\x01\x81", "ISO-2022-JP", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-2022-JP", "\x01\x01", nil], + ["ASCII-8BIT", "\x01\x81", "ISO-2022-JP", "\x01\x81", nil], + ["ISO-8859-1", "", "US-ASCII", "", "ISO-8859-1"], + ["ISO-8859-1", "", "US-ASCII", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ISO-8859-1", "", "UTF-8", "", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-8", "\u0001\u0001", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["ISO-8859-1", "", "ASCII-8BIT", "", "ISO-8859-1"], + ["ISO-8859-1", "", "ASCII-8BIT", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ISO-8859-1", "", "ISO-8859-1", "", "ISO-8859-1"], + ["ISO-8859-1", "", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-16BE", "", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["ISO-8859-1", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["ISO-8859-1", "", "ISO-2022-JP", "", "ISO-8859-1"], + ["ISO-8859-1", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-8859-1", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ISO-8859-1", "\x01\x01", "US-ASCII", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "US-ASCII", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ISO-8859-1", "\x01\x01", "UTF-8", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-8", "\u0001\u0001", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-8", "\u0001\x81", "UTF-8"], + ["ISO-8859-1", "\x01\x01", "ASCII-8BIT", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ASCII-8BIT", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ISO-8859-1", "\x01\x01", "ISO-8859-1", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-16BE", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["ISO-8859-1", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["ISO-8859-1", "\x01\x01", "ISO-2022-JP", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ISO-2022-JP", "\x01\x01", nil], + ["ISO-8859-1", "\x01\x01", "ISO-2022-JP", "\x01\x81", nil], + ["ISO-8859-1", "\x01\x81", "US-ASCII", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "US-ASCII", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "US-ASCII", "\x01\x81", nil], + ["ISO-8859-1", "\x01\x81", "UTF-8", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-8", "\u0001\u0001", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["ISO-8859-1", "\x01\x81", "ASCII-8BIT", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ASCII-8BIT", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ASCII-8BIT", "\x01\x81", nil], + ["ISO-8859-1", "\x01\x81", "ISO-8859-1", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-16BE", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["ISO-8859-1", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["ISO-8859-1", "\x01\x81", "ISO-2022-JP", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ISO-2022-JP", "\x01\x01", nil], + ["ISO-8859-1", "\x01\x81", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-16BE", "", "US-ASCII", "", "UTF-16BE"], + ["UTF-16BE", "", "US-ASCII", "\x01\x01", "US-ASCII"], + ["UTF-16BE", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["UTF-16BE", "", "UTF-8", "", "UTF-16BE"], + ["UTF-16BE", "", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-16BE", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-16BE", "", "ASCII-8BIT", "", "UTF-16BE"], + ["UTF-16BE", "", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["UTF-16BE", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["UTF-16BE", "", "ISO-8859-1", "", "UTF-16BE"], + ["UTF-16BE", "", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["UTF-16BE", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["UTF-16BE", "", "UTF-16BE", "", "UTF-16BE"], + ["UTF-16BE", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-16BE", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-16BE", "", "ISO-2022-JP", "", "UTF-16BE"], + ["UTF-16BE", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["UTF-16BE", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["UTF-16BE", "\u0101", "US-ASCII", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "US-ASCII", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "US-ASCII", "\x01\x81", nil], + ["UTF-16BE", "\u0101", "UTF-8", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "UTF-8", "\u0001\u0001", nil], + ["UTF-16BE", "\u0101", "UTF-8", "\u0001\x81", nil], + ["UTF-16BE", "\u0101", "ASCII-8BIT", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ASCII-8BIT", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "ASCII-8BIT", "\x01\x81", nil], + ["UTF-16BE", "\u0101", "ISO-8859-1", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ISO-8859-1", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "ISO-8859-1", "\x01\x81", nil], + ["UTF-16BE", "\u0101", "UTF-16BE", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-16BE", "\u0101", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ISO-2022-JP", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "US-ASCII", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "US-ASCII", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "US-ASCII", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "UTF-8", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "UTF-8", "\u0001\u0001", nil], + ["UTF-16BE", "\u0181", "UTF-8", "\u0001\x81", nil], + ["UTF-16BE", "\u0181", "ASCII-8BIT", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ASCII-8BIT", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "ASCII-8BIT", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "ISO-8859-1", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ISO-8859-1", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "ISO-8859-1", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "UTF-16BE", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-16BE", "\u0181", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ISO-2022-JP", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "ISO-2022-JP", "\x01\x81", nil], + ["ISO-2022-JP", "", "US-ASCII", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "US-ASCII", "\x01\x01", "US-ASCII"], + ["ISO-2022-JP", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ISO-2022-JP", "", "UTF-8", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "UTF-8", "\u0001\u0001", "UTF-8"], + ["ISO-2022-JP", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["ISO-2022-JP", "", "ASCII-8BIT", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ISO-2022-JP", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ISO-2022-JP", "", "ISO-8859-1", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-2022-JP", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-2022-JP", "", "UTF-16BE", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["ISO-2022-JP", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["ISO-2022-JP", "", "ISO-2022-JP", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "US-ASCII", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "US-ASCII", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x01", "US-ASCII", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-8", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "UTF-8", "\u0001\u0001", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-8", "\u0001\x81", nil], + ["ISO-2022-JP", "\x01\x01", "ASCII-8BIT", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ASCII-8BIT", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x01", "ASCII-8BIT", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x01", "ISO-8859-1", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ISO-8859-1", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x01", "ISO-8859-1", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-16BE", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["ISO-2022-JP", "\x01\x01", "ISO-2022-JP", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "US-ASCII", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "US-ASCII", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x81", "US-ASCII", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-8", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "UTF-8", "\u0001\u0001", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["ISO-2022-JP", "\x01\x81", "ASCII-8BIT", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ASCII-8BIT", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x81", "ASCII-8BIT", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x81", "ISO-8859-1", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ISO-8859-1", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x81", "ISO-8859-1", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-16BE", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["ISO-2022-JP", "\x01\x81", "ISO-2022-JP", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ] + + matrix.each do |encoding1, value1, encoding2, value2, compatible_encoding| + it "returns #{compatible_encoding} for #{value1.inspect} in #{encoding1} and #{value2.inspect} in #{encoding2}" do + actual_encoding = Encoding.compatible?(value1.dup.force_encoding(encoding1), value2.dup.force_encoding(encoding2)) + actual_encoding&.name.should == compatible_encoding + end + end + end end describe "Encoding.compatible? String, Regexp" do it "returns US-ASCII if both are US-ASCII" do - str = "abc".force_encoding("us-ascii") + str = "abc".dup.force_encoding("us-ascii") Encoding.compatible?(str, /abc/).should == Encoding::US_ASCII end it "returns the String's Encoding if it is not US-ASCII but both are ASCII only" do - [ [Encoding, "abc", Encoding::ASCII_8BIT], + [ [Encoding, "abc", Encoding::BINARY], [Encoding, "abc".encode("utf-8"), Encoding::UTF_8], [Encoding, "abc".encode("euc-jp"), Encoding::EUC_JP], [Encoding, "abc".encode("shift_jis"), Encoding::Shift_JIS], @@ -178,22 +551,27 @@ describe "Encoding.compatible? String, Regexp" do end it "returns the String's Encoding if the String is not ASCII only" do - [ [Encoding, "\xff", Encoding::ASCII_8BIT], + [ [Encoding, "\xff", Encoding::BINARY], [Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8], - [Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::EUC_JP], - [Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::Shift_JIS], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp"), Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis"), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, /abc/) end + + it "returns the Regexp's Encoding if the String is ASCII only and the Regexp is not" do + r = Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")) + Encoding.compatible?("hello".dup.force_encoding("utf-8"), r).should == Encoding::EUC_JP + end end describe "Encoding.compatible? String, Symbol" do it "returns US-ASCII if both are ASCII only" do - str = "abc".force_encoding("us-ascii") + str = "abc".dup.force_encoding("us-ascii") Encoding.compatible?(str, :abc).should == Encoding::US_ASCII end it "returns the String's Encoding if it is not US-ASCII but both are ASCII only" do - [ [Encoding, "abc", Encoding::ASCII_8BIT], + [ [Encoding, "abc", Encoding::BINARY], [Encoding, "abc".encode("utf-8"), Encoding::UTF_8], [Encoding, "abc".encode("euc-jp"), Encoding::EUC_JP], [Encoding, "abc".encode("shift_jis"), Encoding::Shift_JIS], @@ -201,51 +579,60 @@ describe "Encoding.compatible? String, Symbol" do end it "returns the String's Encoding if the String is not ASCII only" do - [ [Encoding, "\xff", Encoding::ASCII_8BIT], + [ [Encoding, "\xff", Encoding::BINARY], [Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8], - [Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::EUC_JP], - [Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::Shift_JIS], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp"), Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis"), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, :abc) end end describe "Encoding.compatible? String, Encoding" do it "returns nil if the String's encoding is not ASCII compatible" do - Encoding.compatible?("abc".encode("utf-32le"), Encoding::US_ASCII).should be_nil + Encoding.compatible?("abc".encode("utf-32le"), Encoding::US_ASCII).should == nil end it "returns nil if the Encoding is not ASCII compatible" do - Encoding.compatible?("abc".encode("us-ascii"), Encoding::UTF_32LE).should be_nil + Encoding.compatible?("abc".encode("us-ascii"), Encoding::UTF_32LE).should == nil end it "returns the String's encoding if the Encoding is US-ASCII" do - [ [Encoding, "\xff", Encoding::ASCII_8BIT], + [ [Encoding, "\xff", Encoding::BINARY], [Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8], - [Encoding, "\xa4\xa2".force_encoding("euc-jp"), Encoding::EUC_JP], - [Encoding, "\x82\xa0".force_encoding("shift_jis"), Encoding::Shift_JIS], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp"), Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis"), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, Encoding::US_ASCII) end it "returns the Encoding if the String's encoding is ASCII compatible and the String is ASCII only" do str = "abc".encode("utf-8") - Encoding.compatible?(str, Encoding::ASCII_8BIT).should == Encoding::ASCII_8BIT + Encoding.compatible?(str, Encoding::BINARY).should == Encoding::BINARY Encoding.compatible?(str, Encoding::UTF_8).should == Encoding::UTF_8 Encoding.compatible?(str, Encoding::EUC_JP).should == Encoding::EUC_JP Encoding.compatible?(str, Encoding::Shift_JIS).should == Encoding::Shift_JIS end it "returns nil if the String's encoding is ASCII compatible but the string is not ASCII only" do - Encoding.compatible?("\u3042".encode("utf-8"), Encoding::ASCII_8BIT).should be_nil + Encoding.compatible?("\u3042".encode("utf-8"), Encoding::BINARY).should == nil end end describe "Encoding.compatible? Regexp, String" do it "returns US-ASCII if both are US-ASCII" do - str = "abc".force_encoding("us-ascii") + str = "abc".dup.force_encoding("us-ascii") Encoding.compatible?(/abc/, str).should == Encoding::US_ASCII end + it "returns the String's Encoding when the String is ASCII only with a different encoding" do + r = Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")) + Encoding.compatible?(r, "hello".dup.force_encoding("utf-8")).should == Encoding::UTF_8 + end + + it "returns the Regexp's Encoding if the String has the same non-ASCII encoding" do + r = Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")) + Encoding.compatible?(r, "hello".dup.force_encoding("euc-jp")).should == Encoding::EUC_JP + end end describe "Encoding.compatible? Regexp, Regexp" do @@ -254,10 +641,10 @@ describe "Encoding.compatible? Regexp, Regexp" do end it "returns the first's Encoding if it is not US-ASCII and not ASCII only" do - [ [Encoding, Regexp.new("\xff"), Encoding::ASCII_8BIT], + [ [Encoding, Regexp.new("\xff"), Encoding::BINARY], [Encoding, Regexp.new("\u3042".encode("utf-8")), Encoding::UTF_8], - [Encoding, Regexp.new("\xa4\xa2".force_encoding("euc-jp")), Encoding::EUC_JP], - [Encoding, Regexp.new("\x82\xa0".force_encoding("shift_jis")), Encoding::Shift_JIS], + [Encoding, Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")), Encoding::EUC_JP], + [Encoding, Regexp.new("\x82\xa0".dup.force_encoding("shift_jis")), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, /abc/) end end @@ -268,17 +655,17 @@ describe "Encoding.compatible? Regexp, Symbol" do end it "returns the first's Encoding if it is not US-ASCII and not ASCII only" do - [ [Encoding, Regexp.new("\xff"), Encoding::ASCII_8BIT], + [ [Encoding, Regexp.new("\xff"), Encoding::BINARY], [Encoding, Regexp.new("\u3042".encode("utf-8")), Encoding::UTF_8], - [Encoding, Regexp.new("\xa4\xa2".force_encoding("euc-jp")), Encoding::EUC_JP], - [Encoding, Regexp.new("\x82\xa0".force_encoding("shift_jis")), Encoding::Shift_JIS], + [Encoding, Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")), Encoding::EUC_JP], + [Encoding, Regexp.new("\x82\xa0".dup.force_encoding("shift_jis")), Encoding::Shift_JIS], ].should be_computed_by(:compatible?, /abc/) end end describe "Encoding.compatible? Symbol, String" do it "returns US-ASCII if both are ASCII only" do - str = "abc".force_encoding("us-ascii") + str = "abc".dup.force_encoding("us-ascii") Encoding.compatible?(str, :abc).should == Encoding::US_ASCII end end @@ -291,10 +678,10 @@ describe "Encoding.compatible? Symbol, Regexp" do it "returns the Regexp's Encoding if it is not US-ASCII and not ASCII only" do a = Regexp.new("\xff") b = Regexp.new("\u3042".encode("utf-8")) - c = Regexp.new("\xa4\xa2".force_encoding("euc-jp")) - d = Regexp.new("\x82\xa0".force_encoding("shift_jis")) + c = Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")) + d = Regexp.new("\x82\xa0".dup.force_encoding("shift_jis")) - [ [Encoding, :abc, a, Encoding::ASCII_8BIT], + [ [Encoding, :abc, a, Encoding::BINARY], [Encoding, :abc, b, Encoding::UTF_8], [Encoding, :abc, c, Encoding::EUC_JP], [Encoding, :abc, d, Encoding::Shift_JIS], @@ -308,10 +695,10 @@ describe "Encoding.compatible? Symbol, Symbol" do end it "returns the first's Encoding if it is not ASCII only" do - [ [Encoding, "\xff".to_sym, Encoding::ASCII_8BIT], + [ [Encoding, "\xff".to_sym, Encoding::BINARY], [Encoding, "\u3042".encode("utf-8").to_sym, Encoding::UTF_8], - [Encoding, "\xa4\xa2".force_encoding("euc-jp").to_sym, Encoding::EUC_JP], - [Encoding, "\x82\xa0".force_encoding("shift_jis").to_sym, Encoding::Shift_JIS], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp").to_sym, Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis").to_sym, Encoding::Shift_JIS], ].should be_computed_by(:compatible?, :abc) end end @@ -322,15 +709,15 @@ describe "Encoding.compatible? Encoding, Encoding" do [Encoding, Encoding::US_ASCII, Encoding::UTF_7, nil], [Encoding, Encoding::EUC_JP, Encoding::UTF_7, nil], [Encoding, Encoding::UTF_7, Encoding::EUC_JP, nil], - [Encoding, Encoding::UTF_7, Encoding::ASCII_8BIT, nil], - [Encoding, Encoding::ASCII_8BIT, Encoding::UTF_7, nil], + [Encoding, Encoding::UTF_7, Encoding::BINARY, nil], + [Encoding, Encoding::BINARY, Encoding::UTF_7, nil], ].should be_computed_by(:compatible?) end it "returns nil if one of the encodings is not US-ASCII" do - [ [Encoding, Encoding::UTF_8, Encoding::ASCII_8BIT, nil], - [Encoding, Encoding::ASCII_8BIT, Encoding::UTF_8, nil], - [Encoding, Encoding::ASCII_8BIT, Encoding::EUC_JP, nil], + [ [Encoding, Encoding::UTF_8, Encoding::BINARY, nil], + [Encoding, Encoding::BINARY, Encoding::UTF_8, nil], + [Encoding, Encoding::BINARY, Encoding::EUC_JP, nil], [Encoding, Encoding::Shift_JIS, Encoding::EUC_JP, nil], ].should be_computed_by(:compatible?) end @@ -339,14 +726,14 @@ describe "Encoding.compatible? Encoding, Encoding" do [ [Encoding, Encoding::UTF_8, Encoding::US_ASCII, Encoding::UTF_8], [Encoding, Encoding::EUC_JP, Encoding::US_ASCII, Encoding::EUC_JP], [Encoding, Encoding::Shift_JIS, Encoding::US_ASCII, Encoding::Shift_JIS], - [Encoding, Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::ASCII_8BIT], + [Encoding, Encoding::BINARY, Encoding::US_ASCII, Encoding::BINARY], ].should be_computed_by(:compatible?) end it "returns the Encoding if both are the same" do [ [Encoding, Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8], [Encoding, Encoding::US_ASCII, Encoding::US_ASCII, Encoding::US_ASCII], - [Encoding, Encoding::ASCII_8BIT, Encoding::ASCII_8BIT, Encoding::ASCII_8BIT], + [Encoding, Encoding::BINARY, Encoding::BINARY, Encoding::BINARY], [Encoding, Encoding::UTF_7, Encoding::UTF_7, Encoding::UTF_7], ].should be_computed_by(:compatible?) end @@ -354,26 +741,32 @@ end describe "Encoding.compatible? Object, Object" do it "returns nil for Object, String" do - Encoding.compatible?(Object.new, "abc").should be_nil + Encoding.compatible?(Object.new, "abc").should == nil end it "returns nil for Object, Regexp" do - Encoding.compatible?(Object.new, /./).should be_nil + Encoding.compatible?(Object.new, /./).should == nil end it "returns nil for Object, Symbol" do - Encoding.compatible?(Object.new, :sym).should be_nil + Encoding.compatible?(Object.new, :sym).should == nil end it "returns nil for String, Object" do - Encoding.compatible?("abc", Object.new).should be_nil + Encoding.compatible?("abc", Object.new).should == nil end it "returns nil for Regexp, Object" do - Encoding.compatible?(/./, Object.new).should be_nil + Encoding.compatible?(/./, Object.new).should == nil end it "returns nil for Symbol, Object" do - Encoding.compatible?(:sym, Object.new).should be_nil + Encoding.compatible?(:sym, Object.new).should == nil + end +end + +describe "Encoding.compatible? nil, nil" do + it "returns nil" do + Encoding.compatible?(nil, nil).should == nil end end |
