diff options
Diffstat (limited to 'spec/ruby/core/encoding')
47 files changed, 2698 insertions, 0 deletions
diff --git a/spec/ruby/core/encoding/_dump_spec.rb b/spec/ruby/core/encoding/_dump_spec.rb new file mode 100644 index 0000000000..623fe88ec9 --- /dev/null +++ b/spec/ruby/core/encoding/_dump_spec.rb @@ -0,0 +1,5 @@ +require_relative '../../spec_helper' + +describe "Encoding#_dump" do + it "needs to be reviewed for spec completeness" +end diff --git a/spec/ruby/core/encoding/_load_spec.rb b/spec/ruby/core/encoding/_load_spec.rb new file mode 100644 index 0000000000..608098d34b --- /dev/null +++ b/spec/ruby/core/encoding/_load_spec.rb @@ -0,0 +1,5 @@ +require_relative '../../spec_helper' + +describe "Encoding._load" do + it "needs to be reviewed for spec completeness" +end diff --git a/spec/ruby/core/encoding/aliases_spec.rb b/spec/ruby/core/encoding/aliases_spec.rb new file mode 100644 index 0000000000..786157981a --- /dev/null +++ b/spec/ruby/core/encoding/aliases_spec.rb @@ -0,0 +1,43 @@ +require_relative '../../spec_helper' + +describe "Encoding.aliases" do + it "returns a Hash" do + Encoding.aliases.should be_an_instance_of(Hash) + end + + it "has Strings as keys" do + Encoding.aliases.keys.each do |key| + key.should be_an_instance_of(String) + end + end + + it "has Strings as values" do + Encoding.aliases.values.each do |value| + value.should be_an_instance_of(String) + end + end + + it "has alias names as its keys" do + Encoding.aliases.key?('BINARY').should be_true + Encoding.aliases.key?('ASCII').should be_true + end + + it "has the names of the aliased encoding as its values" do + Encoding.aliases['BINARY'].should == 'ASCII-8BIT' + Encoding.aliases['ASCII'].should == 'US-ASCII' + end + + it "has an 'external' key with the external default encoding as its value" do + Encoding.aliases['external'].should == Encoding.default_external.name + end + + it "has a 'locale' key and its value equals the name of the encoding found by the locale charmap" do + Encoding.aliases['locale'].should == Encoding.find(Encoding.locale_charmap).name + end + + it "only contains valid aliased encodings" do + Encoding.aliases.each do |aliased, original| + Encoding.find(aliased).should == Encoding.find(original) + end + end +end diff --git a/spec/ruby/core/encoding/ascii_compatible_spec.rb b/spec/ruby/core/encoding/ascii_compatible_spec.rb new file mode 100644 index 0000000000..4804300e85 --- /dev/null +++ b/spec/ruby/core/encoding/ascii_compatible_spec.rb @@ -0,0 +1,11 @@ +require_relative '../../spec_helper' + +describe "Encoding#ascii_compatible?" do + it "returns true if self represents an ASCII-compatible encoding" do + Encoding::UTF_8.ascii_compatible?.should be_true + end + + it "returns false if self does not represent an ASCII-compatible encoding" do + Encoding::UTF_16LE.ascii_compatible?.should be_false + end +end diff --git a/spec/ruby/core/encoding/compatible_spec.rb b/spec/ruby/core/encoding/compatible_spec.rb new file mode 100644 index 0000000000..31376a3b75 --- /dev/null +++ b/spec/ruby/core/encoding/compatible_spec.rb @@ -0,0 +1,758 @@ +# encoding: binary + +require_relative '../../spec_helper' + +# TODO: add IO + +describe "Encoding.compatible? String, String" do + describe "when the first's Encoding is valid US-ASCII" do + before :each do + @str = "abc".dup.force_encoding Encoding::US_ASCII + end + + it "returns US-ASCII when the second's is US-ASCII" do + Encoding.compatible?(@str, "def".encode("us-ascii")).should == Encoding::US_ASCII + end + + it "returns US-ASCII if the second String is BINARY and ASCII only" do + Encoding.compatible?(@str, "\x7f").should == Encoding::US_ASCII + end + + it "returns BINARY if the second String is BINARY but not ASCII only" do + Encoding.compatible?(@str, "\xff").should == Encoding::BINARY + end + + it "returns US-ASCII if the second String is UTF-8 and ASCII only" do + Encoding.compatible?(@str, "\x7f".encode("utf-8")).should == Encoding::US_ASCII + end + + it "returns UTF-8 if the second String is UTF-8 but not ASCII only" do + Encoding.compatible?(@str, "\u3042".encode("utf-8")).should == Encoding::UTF_8 + end + end + + describe "when the first's Encoding is ASCII compatible and ASCII only" do + it "returns the first's Encoding if the second is ASCII compatible and ASCII only" do + [ [Encoding, "abc".dup.force_encoding("UTF-8"), "123".dup.force_encoding("Shift_JIS"), Encoding::UTF_8], + [Encoding, "123".dup.force_encoding("Shift_JIS"), "abc".dup.force_encoding("UTF-8"), Encoding::Shift_JIS] + ].should be_computed_by(:compatible?) + end + + it "returns the first's Encoding if the second is ASCII compatible and ASCII only" do + [ [Encoding, "abc".dup.force_encoding("BINARY"), "123".dup.force_encoding("US-ASCII"), Encoding::BINARY], + [Encoding, "123".dup.force_encoding("US-ASCII"), "abc".dup.force_encoding("BINARY"), Encoding::US_ASCII] + ].should be_computed_by(:compatible?) + end + + it "returns the second's Encoding if the second is ASCII compatible but not ASCII only" do + [ [Encoding, "abc".dup.force_encoding("UTF-8"), "\xff".dup.force_encoding("Shift_JIS"), Encoding::Shift_JIS], + [Encoding, "123".dup.force_encoding("Shift_JIS"), "\xff".dup.force_encoding("UTF-8"), Encoding::UTF_8], + [Encoding, "abc".dup.force_encoding("BINARY"), "\xff".dup.force_encoding("US-ASCII"), Encoding::US_ASCII], + [Encoding, "123".dup.force_encoding("US-ASCII"), "\xff".dup.force_encoding("BINARY"), Encoding::BINARY], + ].should be_computed_by(:compatible?) + end + + it "returns nil if the second's Encoding is not ASCII compatible" do + a = "abc".dup.force_encoding("UTF-8") + b = "1234".dup.force_encoding("UTF-16LE") + Encoding.compatible?(a, b).should be_nil + end + end + + describe "when the first's Encoding is ASCII compatible but not ASCII only" do + it "returns the first's Encoding if the second's is valid US-ASCII" do + Encoding.compatible?("\xff", "def".encode("us-ascii")).should == Encoding::BINARY + end + + it "returns the first's Encoding if the second's is UTF-8 and ASCII only" do + Encoding.compatible?("\xff", "\u{7f}".encode("utf-8")).should == Encoding::BINARY + end + + it "returns nil if the second encoding is ASCII compatible but neither String's encoding is ASCII only" do + Encoding.compatible?("\xff", "\u3042".encode("utf-8")).should be_nil + end + end + + describe "when the first's Encoding is not ASCII compatible" do + before :each do + @str = "abc".dup.force_encoding Encoding::UTF_7 + end + + it "returns nil when the second String is US-ASCII" do + Encoding.compatible?(@str, "def".encode("us-ascii")).should be_nil + end + + it "returns nil when the second String is BINARY and ASCII only" do + Encoding.compatible?(@str, "\x7f").should be_nil + end + + it "returns nil when the second String is BINARY but not ASCII only" do + Encoding.compatible?(@str, "\xff").should be_nil + end + + it "returns the Encoding when the second's Encoding is not ASCII compatible but the same as the first's Encoding" do + encoding = Encoding.compatible?(@str, "def".dup.force_encoding("utf-7")) + encoding.should == Encoding::UTF_7 + end + end + + describe "when the first's Encoding is invalid" do + before :each do + @str = "\xff".dup.force_encoding Encoding::UTF_8 + end + + it "returns the first's Encoding when the second's Encoding is US-ASCII" do + Encoding.compatible?(@str, "def".encode("us-ascii")).should == Encoding::UTF_8 + end + + it "returns the first's Encoding when the second String is ASCII only" do + Encoding.compatible?(@str, "\x7f").should == Encoding::UTF_8 + end + + it "returns nil when the second's Encoding is BINARY but not ASCII only" do + Encoding.compatible?(@str, "\xff").should be_nil + end + + it "returns nil when the second's Encoding is invalid and ASCII only" do + Encoding.compatible?(@str, "\x7f\x7f".dup.force_encoding("utf-16be")).should be_nil + end + + it "returns nil when the second's Encoding is invalid and not ASCII only" do + Encoding.compatible?(@str, "\xff\xff".dup.force_encoding("utf-16be")).should be_nil + end + + it "returns the Encoding when the second's Encoding is invalid but the same as the first" do + Encoding.compatible?(@str, @str).should == Encoding::UTF_8 + end + end + + describe "when the first String is empty and the second is not" do + describe "and the first's Encoding is ASCII compatible" do + before :each do + @str = "".dup.force_encoding("utf-8") + end + + it "returns the first's encoding when the second String is ASCII only" do + Encoding.compatible?(@str, "def".encode("us-ascii")).should == Encoding::UTF_8 + end + + it "returns the second's encoding when the second String is not ASCII only" do + Encoding.compatible?(@str, "def".encode("utf-32le")).should == Encoding::UTF_32LE + end + end + + describe "when the first's Encoding is not ASCII compatible" do + before :each do + @str = "".dup.force_encoding Encoding::UTF_7 + end + + it "returns the second string's encoding" do + Encoding.compatible?(@str, "def".encode("us-ascii")).should == Encoding::US_ASCII + end + end + end + + describe "when the second String is empty" do + before :each do + @str = "abc".dup.force_encoding("utf-7") + end + + it "returns the first Encoding" do + Encoding.compatible?(@str, "").should == Encoding::UTF_7 + end + end + + # Encoding negotiation depends on whether encodings are ASCII-compatible, empty + # and contain only ASCII characters (that take 7 bits). Check US-ASCII, UTF-8 and + # BINARY encodings (as most common) as well as an ASCII-compatible, a non-ASCII-compatible and a dummy + # encodings in all possible combinations. + describe "compatibility matrix" do + +# Use the following script to regenerate the matrix: +# +# ``` +# # encoding: binary +# +# ENCODINGS = [ +# "US-ASCII", +# "UTF-8", +# "ASCII-8BIT", +# "ISO-8859-1", # ASCII-compatible +# "UTF-16BE", # non-ASCII-compatible +# "ISO-2022-JP" # dummy +# ] +# +# TYPES = [:empty, :"7bits", :non7bits] +# +# VALUES = { +# empty: "", +# :"7bits" => "\x01\x01", +# non7bits: "\x01\x81" +# } +# +# ENCODINGS.product(TYPES, ENCODINGS, TYPES).each do |encoding1, type1, encoding2, type2| +# value1 = VALUES[type1].dup.force_encoding(encoding1) +# value2 = VALUES[type2].dup.force_encoding(encoding2) +# +# result_encoding = Encoding.compatible?(value1, value2) +# +# puts "[#{encoding1.inspect}, #{value1.inspect}, #{encoding2.inspect}, #{value2.inspect}, #{result_encoding&.name.inspect}]," +# end +# ``` + + matrix = [ + ["US-ASCII", "", "US-ASCII", "", "US-ASCII"], + ["US-ASCII", "", "US-ASCII", "\x01\x01", "US-ASCII"], + ["US-ASCII", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["US-ASCII", "", "UTF-8", "", "US-ASCII"], + ["US-ASCII", "", "UTF-8", "\u0001\u0001", "US-ASCII"], + ["US-ASCII", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["US-ASCII", "", "ASCII-8BIT", "", "US-ASCII"], + ["US-ASCII", "", "ASCII-8BIT", "\x01\x01", "US-ASCII"], + ["US-ASCII", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["US-ASCII", "", "ISO-8859-1", "", "US-ASCII"], + ["US-ASCII", "", "ISO-8859-1", "\x01\x01", "US-ASCII"], + ["US-ASCII", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["US-ASCII", "", "UTF-16BE", "", "US-ASCII"], + ["US-ASCII", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["US-ASCII", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["US-ASCII", "", "ISO-2022-JP", "", "US-ASCII"], + ["US-ASCII", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["US-ASCII", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["US-ASCII", "\x01\x01", "US-ASCII", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "US-ASCII", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x01", "US-ASCII", "\x01\x81", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-8", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-8", "\u0001\u0001", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-8", "\u0001\x81", "UTF-8"], + ["US-ASCII", "\x01\x01", "ASCII-8BIT", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ASCII-8BIT", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["US-ASCII", "\x01\x01", "ISO-8859-1", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ISO-8859-1", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["US-ASCII", "\x01\x01", "UTF-16BE", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["US-ASCII", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["US-ASCII", "\x01\x01", "ISO-2022-JP", "", "US-ASCII"], + ["US-ASCII", "\x01\x01", "ISO-2022-JP", "\x01\x01", nil], + ["US-ASCII", "\x01\x01", "ISO-2022-JP", "\x01\x81", nil], + ["US-ASCII", "\x01\x81", "US-ASCII", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "US-ASCII", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x81", "US-ASCII", "\x01\x81", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-8", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-8", "\u0001\u0001", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["US-ASCII", "\x01\x81", "ASCII-8BIT", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ASCII-8BIT", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ASCII-8BIT", "\x01\x81", nil], + ["US-ASCII", "\x01\x81", "ISO-8859-1", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ISO-8859-1", "\x01\x01", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ISO-8859-1", "\x01\x81", nil], + ["US-ASCII", "\x01\x81", "UTF-16BE", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["US-ASCII", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["US-ASCII", "\x01\x81", "ISO-2022-JP", "", "US-ASCII"], + ["US-ASCII", "\x01\x81", "ISO-2022-JP", "\x01\x01", nil], + ["US-ASCII", "\x01\x81", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-8", "", "US-ASCII", "", "UTF-8"], + ["UTF-8", "", "US-ASCII", "\x01\x01", "UTF-8"], + ["UTF-8", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["UTF-8", "", "UTF-8", "", "UTF-8"], + ["UTF-8", "", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-8", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-8", "", "ASCII-8BIT", "", "UTF-8"], + ["UTF-8", "", "ASCII-8BIT", "\x01\x01", "UTF-8"], + ["UTF-8", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["UTF-8", "", "ISO-8859-1", "", "UTF-8"], + ["UTF-8", "", "ISO-8859-1", "\x01\x01", "UTF-8"], + ["UTF-8", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["UTF-8", "", "UTF-16BE", "", "UTF-8"], + ["UTF-8", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-8", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-8", "", "ISO-2022-JP", "", "UTF-8"], + ["UTF-8", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["UTF-8", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["UTF-8", "\u0001\u0001", "US-ASCII", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "US-ASCII", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\u0001", "US-ASCII", "\x01\x81", "US-ASCII"], + ["UTF-8", "\u0001\u0001", "UTF-8", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-8", "\u0001\u0001", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ASCII-8BIT", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ASCII-8BIT", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["UTF-8", "\u0001\u0001", "ISO-8859-1", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ISO-8859-1", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["UTF-8", "\u0001\u0001", "UTF-16BE", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "UTF-16BE", "\u0101", nil], + ["UTF-8", "\u0001\u0001", "UTF-16BE", "\u0181", nil], + ["UTF-8", "\u0001\u0001", "ISO-2022-JP", "", "UTF-8"], + ["UTF-8", "\u0001\u0001", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-8", "\u0001\u0001", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "US-ASCII", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "US-ASCII", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\x81", "US-ASCII", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "UTF-8", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-8", "\u0001\x81", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-8", "\u0001\x81", "ASCII-8BIT", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "ASCII-8BIT", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\x81", "ASCII-8BIT", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "ISO-8859-1", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "ISO-8859-1", "\x01\x01", "UTF-8"], + ["UTF-8", "\u0001\x81", "ISO-8859-1", "\x01\x81", nil], + ["UTF-8", "\u0001\x81", "UTF-16BE", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "UTF-16BE", "\u0101", nil], + ["UTF-8", "\u0001\x81", "UTF-16BE", "\u0181", nil], + ["UTF-8", "\u0001\x81", "ISO-2022-JP", "", "UTF-8"], + ["UTF-8", "\u0001\x81", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-8", "\u0001\x81", "ISO-2022-JP", "\x01\x81", nil], + ["ASCII-8BIT", "", "US-ASCII", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "US-ASCII", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ASCII-8BIT", "", "UTF-8", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "UTF-8", "\u0001\u0001", "ASCII-8BIT"], + ["ASCII-8BIT", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["ASCII-8BIT", "", "ASCII-8BIT", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-8859-1", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-8859-1", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ASCII-8BIT", "", "UTF-16BE", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["ASCII-8BIT", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["ASCII-8BIT", "", "ISO-2022-JP", "", "ASCII-8BIT"], + ["ASCII-8BIT", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ASCII-8BIT", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ASCII-8BIT", "\x01\x01", "US-ASCII", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "US-ASCII", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ASCII-8BIT", "\x01\x01", "UTF-8", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "UTF-8", "\u0001\u0001", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "UTF-8", "\u0001\x81", "UTF-8"], + ["ASCII-8BIT", "\x01\x01", "ASCII-8BIT", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-8859-1", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-8859-1", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ASCII-8BIT", "\x01\x01", "UTF-16BE", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["ASCII-8BIT", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["ASCII-8BIT", "\x01\x01", "ISO-2022-JP", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x01", "ISO-2022-JP", "\x01\x01", nil], + ["ASCII-8BIT", "\x01\x01", "ISO-2022-JP", "\x01\x81", nil], + ["ASCII-8BIT", "\x01\x81", "US-ASCII", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "US-ASCII", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "US-ASCII", "\x01\x81", nil], + ["ASCII-8BIT", "\x01\x81", "UTF-8", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "UTF-8", "\u0001\u0001", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["ASCII-8BIT", "\x01\x81", "ASCII-8BIT", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-8859-1", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-8859-1", "\x01\x01", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-8859-1", "\x01\x81", nil], + ["ASCII-8BIT", "\x01\x81", "UTF-16BE", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["ASCII-8BIT", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["ASCII-8BIT", "\x01\x81", "ISO-2022-JP", "", "ASCII-8BIT"], + ["ASCII-8BIT", "\x01\x81", "ISO-2022-JP", "\x01\x01", nil], + ["ASCII-8BIT", "\x01\x81", "ISO-2022-JP", "\x01\x81", nil], + ["ISO-8859-1", "", "US-ASCII", "", "ISO-8859-1"], + ["ISO-8859-1", "", "US-ASCII", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ISO-8859-1", "", "UTF-8", "", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-8", "\u0001\u0001", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["ISO-8859-1", "", "ASCII-8BIT", "", "ISO-8859-1"], + ["ISO-8859-1", "", "ASCII-8BIT", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ISO-8859-1", "", "ISO-8859-1", "", "ISO-8859-1"], + ["ISO-8859-1", "", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-16BE", "", "ISO-8859-1"], + ["ISO-8859-1", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["ISO-8859-1", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["ISO-8859-1", "", "ISO-2022-JP", "", "ISO-8859-1"], + ["ISO-8859-1", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-8859-1", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ISO-8859-1", "\x01\x01", "US-ASCII", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "US-ASCII", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ISO-8859-1", "\x01\x01", "UTF-8", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-8", "\u0001\u0001", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-8", "\u0001\x81", "UTF-8"], + ["ISO-8859-1", "\x01\x01", "ASCII-8BIT", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ASCII-8BIT", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ISO-8859-1", "\x01\x01", "ISO-8859-1", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-16BE", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["ISO-8859-1", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["ISO-8859-1", "\x01\x01", "ISO-2022-JP", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x01", "ISO-2022-JP", "\x01\x01", nil], + ["ISO-8859-1", "\x01\x01", "ISO-2022-JP", "\x01\x81", nil], + ["ISO-8859-1", "\x01\x81", "US-ASCII", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "US-ASCII", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "US-ASCII", "\x01\x81", nil], + ["ISO-8859-1", "\x01\x81", "UTF-8", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-8", "\u0001\u0001", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["ISO-8859-1", "\x01\x81", "ASCII-8BIT", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ASCII-8BIT", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ASCII-8BIT", "\x01\x81", nil], + ["ISO-8859-1", "\x01\x81", "ISO-8859-1", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-16BE", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["ISO-8859-1", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["ISO-8859-1", "\x01\x81", "ISO-2022-JP", "", "ISO-8859-1"], + ["ISO-8859-1", "\x01\x81", "ISO-2022-JP", "\x01\x01", nil], + ["ISO-8859-1", "\x01\x81", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-16BE", "", "US-ASCII", "", "UTF-16BE"], + ["UTF-16BE", "", "US-ASCII", "\x01\x01", "US-ASCII"], + ["UTF-16BE", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["UTF-16BE", "", "UTF-8", "", "UTF-16BE"], + ["UTF-16BE", "", "UTF-8", "\u0001\u0001", "UTF-8"], + ["UTF-16BE", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["UTF-16BE", "", "ASCII-8BIT", "", "UTF-16BE"], + ["UTF-16BE", "", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["UTF-16BE", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["UTF-16BE", "", "ISO-8859-1", "", "UTF-16BE"], + ["UTF-16BE", "", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["UTF-16BE", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["UTF-16BE", "", "UTF-16BE", "", "UTF-16BE"], + ["UTF-16BE", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-16BE", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-16BE", "", "ISO-2022-JP", "", "UTF-16BE"], + ["UTF-16BE", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["UTF-16BE", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["UTF-16BE", "\u0101", "US-ASCII", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "US-ASCII", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "US-ASCII", "\x01\x81", nil], + ["UTF-16BE", "\u0101", "UTF-8", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "UTF-8", "\u0001\u0001", nil], + ["UTF-16BE", "\u0101", "UTF-8", "\u0001\x81", nil], + ["UTF-16BE", "\u0101", "ASCII-8BIT", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ASCII-8BIT", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "ASCII-8BIT", "\x01\x81", nil], + ["UTF-16BE", "\u0101", "ISO-8859-1", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ISO-8859-1", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "ISO-8859-1", "\x01\x81", nil], + ["UTF-16BE", "\u0101", "UTF-16BE", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-16BE", "\u0101", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ISO-2022-JP", "", "UTF-16BE"], + ["UTF-16BE", "\u0101", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-16BE", "\u0101", "ISO-2022-JP", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "US-ASCII", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "US-ASCII", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "US-ASCII", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "UTF-8", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "UTF-8", "\u0001\u0001", nil], + ["UTF-16BE", "\u0181", "UTF-8", "\u0001\x81", nil], + ["UTF-16BE", "\u0181", "ASCII-8BIT", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ASCII-8BIT", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "ASCII-8BIT", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "ISO-8859-1", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ISO-8859-1", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "ISO-8859-1", "\x01\x81", nil], + ["UTF-16BE", "\u0181", "UTF-16BE", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "UTF-16BE", "\u0101", "UTF-16BE"], + ["UTF-16BE", "\u0181", "UTF-16BE", "\u0181", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ISO-2022-JP", "", "UTF-16BE"], + ["UTF-16BE", "\u0181", "ISO-2022-JP", "\x01\x01", nil], + ["UTF-16BE", "\u0181", "ISO-2022-JP", "\x01\x81", nil], + ["ISO-2022-JP", "", "US-ASCII", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "US-ASCII", "\x01\x01", "US-ASCII"], + ["ISO-2022-JP", "", "US-ASCII", "\x01\x81", "US-ASCII"], + ["ISO-2022-JP", "", "UTF-8", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "UTF-8", "\u0001\u0001", "UTF-8"], + ["ISO-2022-JP", "", "UTF-8", "\u0001\x81", "UTF-8"], + ["ISO-2022-JP", "", "ASCII-8BIT", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ASCII-8BIT", "\x01\x01", "ASCII-8BIT"], + ["ISO-2022-JP", "", "ASCII-8BIT", "\x01\x81", "ASCII-8BIT"], + ["ISO-2022-JP", "", "ISO-8859-1", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ISO-8859-1", "\x01\x01", "ISO-8859-1"], + ["ISO-2022-JP", "", "ISO-8859-1", "\x01\x81", "ISO-8859-1"], + ["ISO-2022-JP", "", "UTF-16BE", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "UTF-16BE", "\u0101", "UTF-16BE"], + ["ISO-2022-JP", "", "UTF-16BE", "\u0181", "UTF-16BE"], + ["ISO-2022-JP", "", "ISO-2022-JP", "", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-2022-JP", "", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "US-ASCII", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "US-ASCII", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x01", "US-ASCII", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-8", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "UTF-8", "\u0001\u0001", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-8", "\u0001\x81", nil], + ["ISO-2022-JP", "\x01\x01", "ASCII-8BIT", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ASCII-8BIT", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x01", "ASCII-8BIT", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x01", "ISO-8859-1", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ISO-8859-1", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x01", "ISO-8859-1", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-16BE", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "UTF-16BE", "\u0101", nil], + ["ISO-2022-JP", "\x01\x01", "UTF-16BE", "\u0181", nil], + ["ISO-2022-JP", "\x01\x01", "ISO-2022-JP", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x01", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "US-ASCII", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "US-ASCII", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x81", "US-ASCII", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-8", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "UTF-8", "\u0001\u0001", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-8", "\u0001\x81", nil], + ["ISO-2022-JP", "\x01\x81", "ASCII-8BIT", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ASCII-8BIT", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x81", "ASCII-8BIT", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x81", "ISO-8859-1", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ISO-8859-1", "\x01\x01", nil], + ["ISO-2022-JP", "\x01\x81", "ISO-8859-1", "\x01\x81", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-16BE", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "UTF-16BE", "\u0101", nil], + ["ISO-2022-JP", "\x01\x81", "UTF-16BE", "\u0181", nil], + ["ISO-2022-JP", "\x01\x81", "ISO-2022-JP", "", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ISO-2022-JP", "\x01\x01", "ISO-2022-JP"], + ["ISO-2022-JP", "\x01\x81", "ISO-2022-JP", "\x01\x81", "ISO-2022-JP"], + ] + + matrix.each do |encoding1, value1, encoding2, value2, compatible_encoding| + it "returns #{compatible_encoding} for #{value1.inspect} in #{encoding1} and #{value2.inspect} in #{encoding2}" do + actual_encoding = Encoding.compatible?(value1.dup.force_encoding(encoding1), value2.dup.force_encoding(encoding2)) + actual_encoding&.name.should == compatible_encoding + end + end + end +end + +describe "Encoding.compatible? String, Regexp" do + it "returns US-ASCII if both are US-ASCII" do + str = "abc".dup.force_encoding("us-ascii") + Encoding.compatible?(str, /abc/).should == Encoding::US_ASCII + end + + it "returns the String's Encoding if it is not US-ASCII but both are ASCII only" do + [ [Encoding, "abc", Encoding::BINARY], + [Encoding, "abc".encode("utf-8"), Encoding::UTF_8], + [Encoding, "abc".encode("euc-jp"), Encoding::EUC_JP], + [Encoding, "abc".encode("shift_jis"), Encoding::Shift_JIS], + ].should be_computed_by(:compatible?, /abc/) + end + + it "returns the String's Encoding if the String is not ASCII only" do + [ [Encoding, "\xff", Encoding::BINARY], + [Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp"), Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis"), Encoding::Shift_JIS], + ].should be_computed_by(:compatible?, /abc/) + end +end + +describe "Encoding.compatible? String, Symbol" do + it "returns US-ASCII if both are ASCII only" do + str = "abc".dup.force_encoding("us-ascii") + Encoding.compatible?(str, :abc).should == Encoding::US_ASCII + end + + it "returns the String's Encoding if it is not US-ASCII but both are ASCII only" do + [ [Encoding, "abc", Encoding::BINARY], + [Encoding, "abc".encode("utf-8"), Encoding::UTF_8], + [Encoding, "abc".encode("euc-jp"), Encoding::EUC_JP], + [Encoding, "abc".encode("shift_jis"), Encoding::Shift_JIS], + ].should be_computed_by(:compatible?, :abc) + end + + it "returns the String's Encoding if the String is not ASCII only" do + [ [Encoding, "\xff", Encoding::BINARY], + [Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp"), Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis"), Encoding::Shift_JIS], + ].should be_computed_by(:compatible?, :abc) + end +end + +describe "Encoding.compatible? String, Encoding" do + it "returns nil if the String's encoding is not ASCII compatible" do + Encoding.compatible?("abc".encode("utf-32le"), Encoding::US_ASCII).should be_nil + end + + it "returns nil if the Encoding is not ASCII compatible" do + Encoding.compatible?("abc".encode("us-ascii"), Encoding::UTF_32LE).should be_nil + end + + it "returns the String's encoding if the Encoding is US-ASCII" do + [ [Encoding, "\xff", Encoding::BINARY], + [Encoding, "\u3042".encode("utf-8"), Encoding::UTF_8], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp"), Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis"), Encoding::Shift_JIS], + ].should be_computed_by(:compatible?, Encoding::US_ASCII) + end + + it "returns the Encoding if the String's encoding is ASCII compatible and the String is ASCII only" do + str = "abc".encode("utf-8") + + Encoding.compatible?(str, Encoding::BINARY).should == Encoding::BINARY + Encoding.compatible?(str, Encoding::UTF_8).should == Encoding::UTF_8 + Encoding.compatible?(str, Encoding::EUC_JP).should == Encoding::EUC_JP + Encoding.compatible?(str, Encoding::Shift_JIS).should == Encoding::Shift_JIS + end + + it "returns nil if the String's encoding is ASCII compatible but the string is not ASCII only" do + Encoding.compatible?("\u3042".encode("utf-8"), Encoding::BINARY).should be_nil + end +end + +describe "Encoding.compatible? Regexp, String" do + it "returns US-ASCII if both are US-ASCII" do + str = "abc".dup.force_encoding("us-ascii") + Encoding.compatible?(/abc/, str).should == Encoding::US_ASCII + end + +end + +describe "Encoding.compatible? Regexp, Regexp" do + it "returns US-ASCII if both are US-ASCII" do + Encoding.compatible?(/abc/, /def/).should == Encoding::US_ASCII + end + + it "returns the first's Encoding if it is not US-ASCII and not ASCII only" do + [ [Encoding, Regexp.new("\xff"), Encoding::BINARY], + [Encoding, Regexp.new("\u3042".encode("utf-8")), Encoding::UTF_8], + [Encoding, Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")), Encoding::EUC_JP], + [Encoding, Regexp.new("\x82\xa0".dup.force_encoding("shift_jis")), Encoding::Shift_JIS], + ].should be_computed_by(:compatible?, /abc/) + end +end + +describe "Encoding.compatible? Regexp, Symbol" do + it "returns US-ASCII if both are US-ASCII" do + Encoding.compatible?(/abc/, :def).should == Encoding::US_ASCII + end + + it "returns the first's Encoding if it is not US-ASCII and not ASCII only" do + [ [Encoding, Regexp.new("\xff"), Encoding::BINARY], + [Encoding, Regexp.new("\u3042".encode("utf-8")), Encoding::UTF_8], + [Encoding, Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")), Encoding::EUC_JP], + [Encoding, Regexp.new("\x82\xa0".dup.force_encoding("shift_jis")), Encoding::Shift_JIS], + ].should be_computed_by(:compatible?, /abc/) + end +end + +describe "Encoding.compatible? Symbol, String" do + it "returns US-ASCII if both are ASCII only" do + str = "abc".dup.force_encoding("us-ascii") + Encoding.compatible?(str, :abc).should == Encoding::US_ASCII + end +end + +describe "Encoding.compatible? Symbol, Regexp" do + it "returns US-ASCII if both are US-ASCII" do + Encoding.compatible?(:abc, /def/).should == Encoding::US_ASCII + end + + it "returns the Regexp's Encoding if it is not US-ASCII and not ASCII only" do + a = Regexp.new("\xff") + b = Regexp.new("\u3042".encode("utf-8")) + c = Regexp.new("\xa4\xa2".dup.force_encoding("euc-jp")) + d = Regexp.new("\x82\xa0".dup.force_encoding("shift_jis")) + + [ [Encoding, :abc, a, Encoding::BINARY], + [Encoding, :abc, b, Encoding::UTF_8], + [Encoding, :abc, c, Encoding::EUC_JP], + [Encoding, :abc, d, Encoding::Shift_JIS], + ].should be_computed_by(:compatible?) + end +end + +describe "Encoding.compatible? Symbol, Symbol" do + it "returns US-ASCII if both are US-ASCII" do + Encoding.compatible?(:abc, :def).should == Encoding::US_ASCII + end + + it "returns the first's Encoding if it is not ASCII only" do + [ [Encoding, "\xff".to_sym, Encoding::BINARY], + [Encoding, "\u3042".encode("utf-8").to_sym, Encoding::UTF_8], + [Encoding, "\xa4\xa2".dup.force_encoding("euc-jp").to_sym, Encoding::EUC_JP], + [Encoding, "\x82\xa0".dup.force_encoding("shift_jis").to_sym, Encoding::Shift_JIS], + ].should be_computed_by(:compatible?, :abc) + end +end + +describe "Encoding.compatible? Encoding, Encoding" do + it "returns nil if one of the encodings is a dummy encoding" do + [ [Encoding, Encoding::UTF_7, Encoding::US_ASCII, nil], + [Encoding, Encoding::US_ASCII, Encoding::UTF_7, nil], + [Encoding, Encoding::EUC_JP, Encoding::UTF_7, nil], + [Encoding, Encoding::UTF_7, Encoding::EUC_JP, nil], + [Encoding, Encoding::UTF_7, Encoding::BINARY, nil], + [Encoding, Encoding::BINARY, Encoding::UTF_7, nil], + ].should be_computed_by(:compatible?) + end + + it "returns nil if one of the encodings is not US-ASCII" do + [ [Encoding, Encoding::UTF_8, Encoding::BINARY, nil], + [Encoding, Encoding::BINARY, Encoding::UTF_8, nil], + [Encoding, Encoding::BINARY, Encoding::EUC_JP, nil], + [Encoding, Encoding::Shift_JIS, Encoding::EUC_JP, nil], + ].should be_computed_by(:compatible?) + end + + it "returns the first if the second is US-ASCII" do + [ [Encoding, Encoding::UTF_8, Encoding::US_ASCII, Encoding::UTF_8], + [Encoding, Encoding::EUC_JP, Encoding::US_ASCII, Encoding::EUC_JP], + [Encoding, Encoding::Shift_JIS, Encoding::US_ASCII, Encoding::Shift_JIS], + [Encoding, Encoding::BINARY, Encoding::US_ASCII, Encoding::BINARY], + ].should be_computed_by(:compatible?) + end + + it "returns the Encoding if both are the same" do + [ [Encoding, Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8], + [Encoding, Encoding::US_ASCII, Encoding::US_ASCII, Encoding::US_ASCII], + [Encoding, Encoding::BINARY, Encoding::BINARY, Encoding::BINARY], + [Encoding, Encoding::UTF_7, Encoding::UTF_7, Encoding::UTF_7], + ].should be_computed_by(:compatible?) + end +end + +describe "Encoding.compatible? Object, Object" do + it "returns nil for Object, String" do + Encoding.compatible?(Object.new, "abc").should be_nil + end + + it "returns nil for Object, Regexp" do + Encoding.compatible?(Object.new, /./).should be_nil + end + + it "returns nil for Object, Symbol" do + Encoding.compatible?(Object.new, :sym).should be_nil + end + + it "returns nil for String, Object" do + Encoding.compatible?("abc", Object.new).should be_nil + end + + it "returns nil for Regexp, Object" do + Encoding.compatible?(/./, Object.new).should be_nil + end + + it "returns nil for Symbol, Object" do + Encoding.compatible?(:sym, Object.new).should be_nil + end +end + +describe "Encoding.compatible? nil, nil" do + it "returns nil" do + Encoding.compatible?(nil, nil).should be_nil + end +end diff --git a/spec/ruby/core/encoding/converter/asciicompat_encoding_spec.rb b/spec/ruby/core/encoding/converter/asciicompat_encoding_spec.rb new file mode 100644 index 0000000000..1beb40af3f --- /dev/null +++ b/spec/ruby/core/encoding/converter/asciicompat_encoding_spec.rb @@ -0,0 +1,37 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter.asciicompat_encoding" do + it "accepts an encoding name as a String argument" do + -> { Encoding::Converter.asciicompat_encoding('UTF-8') }. + should_not raise_error + end + + it "coerces non-String/Encoding objects with #to_str" do + str = mock('string') + str.should_receive(:to_str).at_least(1).times.and_return('string') + Encoding::Converter.asciicompat_encoding(str) + end + + it "accepts an Encoding object as an argument" do + Encoding::Converter. + asciicompat_encoding(Encoding.find("ISO-2022-JP")). + should == Encoding::Converter.asciicompat_encoding("ISO-2022-JP") + end + + it "returns a corresponding ASCII compatible encoding for ASCII-incompatible encodings" do + Encoding::Converter.asciicompat_encoding('UTF-16BE').should == Encoding::UTF_8 + Encoding::Converter.asciicompat_encoding("ISO-2022-JP").should == Encoding.find("stateless-ISO-2022-JP") + end + + it "returns nil when the given encoding is ASCII compatible" do + Encoding::Converter.asciicompat_encoding('ASCII').should be_nil + Encoding::Converter.asciicompat_encoding('UTF-8').should be_nil + end + + it "handles encoding names who resolve to nil encodings" do + internal = Encoding.default_internal + Encoding.default_internal = nil + Encoding::Converter.asciicompat_encoding('internal').should be_nil + Encoding.default_internal = internal + end +end diff --git a/spec/ruby/core/encoding/converter/constants_spec.rb b/spec/ruby/core/encoding/converter/constants_spec.rb new file mode 100644 index 0000000000..7d29bdb278 --- /dev/null +++ b/spec/ruby/core/encoding/converter/constants_spec.rb @@ -0,0 +1,131 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter::INVALID_MASK" do + it "exists" do + Encoding::Converter.should have_constant(:INVALID_MASK) + end + + it "has an Integer value" do + Encoding::Converter::INVALID_MASK.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::INVALID_REPLACE" do + it "exists" do + Encoding::Converter.should have_constant(:INVALID_REPLACE) + end + + it "has an Integer value" do + Encoding::Converter::INVALID_REPLACE.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::UNDEF_MASK" do + it "exists" do + Encoding::Converter.should have_constant(:UNDEF_MASK) + end + + it "has an Integer value" do + Encoding::Converter::UNDEF_MASK.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::UNDEF_REPLACE" do + it "exists" do + Encoding::Converter.should have_constant(:UNDEF_REPLACE) + end + + it "has an Integer value" do + Encoding::Converter::UNDEF_REPLACE.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::UNDEF_HEX_CHARREF" do + it "exists" do + Encoding::Converter.should have_constant(:UNDEF_HEX_CHARREF) + end + + it "has an Integer value" do + Encoding::Converter::UNDEF_HEX_CHARREF.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::PARTIAL_INPUT" do + it "exists" do + Encoding::Converter.should have_constant(:PARTIAL_INPUT) + end + + it "has an Integer value" do + Encoding::Converter::PARTIAL_INPUT.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::AFTER_OUTPUT" do + it "exists" do + Encoding::Converter.should have_constant(:AFTER_OUTPUT) + end + + it "has an Integer value" do + Encoding::Converter::AFTER_OUTPUT.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:UNIVERSAL_NEWLINE_DECORATOR) + end + + it "has an Integer value" do + Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::CRLF_NEWLINE_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:CRLF_NEWLINE_DECORATOR) + end + + it "has an Integer value" do + Encoding::Converter::CRLF_NEWLINE_DECORATOR.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::CR_NEWLINE_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:CR_NEWLINE_DECORATOR) + end + + it "has an Integer value" do + Encoding::Converter::CR_NEWLINE_DECORATOR.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::XML_TEXT_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:XML_TEXT_DECORATOR) + end + + it "has an Integer value" do + Encoding::Converter::XML_TEXT_DECORATOR.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::XML_ATTR_CONTENT_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:XML_ATTR_CONTENT_DECORATOR) + end + + it "has an Integer value" do + Encoding::Converter::XML_ATTR_CONTENT_DECORATOR.should be_an_instance_of(Integer) + end +end + +describe "Encoding::Converter::XML_ATTR_QUOTE_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:XML_ATTR_QUOTE_DECORATOR) + end + + it "has an Integer value" do + Encoding::Converter::XML_ATTR_QUOTE_DECORATOR.should be_an_instance_of(Integer) + end +end diff --git a/spec/ruby/core/encoding/converter/convert_spec.rb b/spec/ruby/core/encoding/converter/convert_spec.rb new file mode 100644 index 0000000000..8533af4565 --- /dev/null +++ b/spec/ruby/core/encoding/converter/convert_spec.rb @@ -0,0 +1,46 @@ +# encoding: binary +# frozen_string_literal: true +require_relative '../../../spec_helper' + +describe "Encoding::Converter#convert" do + it "returns a String" do + ec = Encoding::Converter.new('ascii', 'utf-8') + ec.convert('glark').should be_an_instance_of(String) + end + + it "sets the encoding of the result to the target encoding" do + ec = Encoding::Converter.new('ascii', 'utf-8') + str = 'glark'.dup.force_encoding('ascii') + ec.convert(str).encoding.should == Encoding::UTF_8 + end + + it "transcodes the given String to the target encoding" do + ec = Encoding::Converter.new("utf-8", "euc-jp") + ec.convert("\u3042".dup.force_encoding('UTF-8')).should == \ + "\xA4\xA2".dup.force_encoding('EUC-JP') + end + + it "allows Strings of different encodings to the source encoding" do + ec = Encoding::Converter.new('ascii', 'utf-8') + str = 'glark'.dup.force_encoding('SJIS') + ec.convert(str).encoding.should == Encoding::UTF_8 + end + + it "reuses the given encoding pair if called multiple times" do + ec = Encoding::Converter.new('ascii', 'SJIS') + ec.convert('a'.dup.force_encoding('ASCII')).should == 'a'.dup.force_encoding('SJIS') + ec.convert('b'.dup.force_encoding('ASCII')).should == 'b'.dup.force_encoding('SJIS') + end + + it "raises UndefinedConversionError if the String contains characters invalid for the target encoding" do + ec = Encoding::Converter.new('UTF-8', Encoding.find('macCyrillic')) + -> { ec.convert("\u{6543}".dup.force_encoding('UTF-8')) }.should \ + raise_error(Encoding::UndefinedConversionError) + end + + it "raises an ArgumentError if called on a finished stream" do + ec = Encoding::Converter.new('UTF-8', Encoding.find('macCyrillic')) + ec.finish + -> { ec.convert("\u{65}") }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/encoding/converter/convpath_spec.rb b/spec/ruby/core/encoding/converter/convpath_spec.rb new file mode 100644 index 0000000000..23f1e5dc33 --- /dev/null +++ b/spec/ruby/core/encoding/converter/convpath_spec.rb @@ -0,0 +1,24 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter#convpath" do + it "returns an Array with a single element if there is a direct converter" do + cp = Encoding::Converter.new('ASCII', 'UTF-8').convpath + cp.should == [[Encoding::US_ASCII, Encoding::UTF_8]] + end + + it "returns multiple encoding pairs when direct conversion is impossible" do + cp = Encoding::Converter.new('ascii','Big5').convpath + cp.should == [ + [Encoding::US_ASCII, Encoding::UTF_8], + [Encoding::UTF_8, Encoding::Big5] + ] + end + + it "indicates if crlf_newline conversion would occur" do + ec = Encoding::Converter.new("ISo-8859-1", "EUC-JP", crlf_newline: true) + ec.convpath.last.should == "crlf_newline" + + ec = Encoding::Converter.new("ASCII", "UTF-8", crlf_newline: false) + ec.convpath.last.should_not == "crlf_newline" + end +end diff --git a/spec/ruby/core/encoding/converter/destination_encoding_spec.rb b/spec/ruby/core/encoding/converter/destination_encoding_spec.rb new file mode 100644 index 0000000000..481a857909 --- /dev/null +++ b/spec/ruby/core/encoding/converter/destination_encoding_spec.rb @@ -0,0 +1,11 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter#destination_encoding" do + it "returns the destination encoding as an Encoding object" do + ec = Encoding::Converter.new('ASCII','Big5') + ec.destination_encoding.should == Encoding::BIG5 + + ec = Encoding::Converter.new('SJIS','EUC-JP') + ec.destination_encoding.should == Encoding::EUC_JP + end +end diff --git a/spec/ruby/core/encoding/converter/finish_spec.rb b/spec/ruby/core/encoding/converter/finish_spec.rb new file mode 100644 index 0000000000..22e66df38c --- /dev/null +++ b/spec/ruby/core/encoding/converter/finish_spec.rb @@ -0,0 +1,36 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter#finish" do + before :each do + @ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + end + + it "returns a String" do + @ec.convert('foo') + @ec.finish.should be_an_instance_of(String) + end + + it "returns an empty String if there is nothing more to convert" do + @ec.convert("glark") + @ec.finish.should == "" + end + + it "returns the last part of the converted String if it hasn't already" do + @ec.convert("\u{9999}").should == "\e$B9a".dup.force_encoding('iso-2022-jp') + @ec.finish.should == "\e(B".dup.force_encoding('iso-2022-jp') + end + + it "returns a String in the destination encoding" do + @ec.convert("glark") + @ec.finish.encoding.should == Encoding::ISO2022_JP + end + + it "returns an empty String if self was not given anything to convert" do + @ec.finish.should == "" + end + + it "returns an empty String on subsequent invocations" do + @ec.finish.should == "" + @ec.finish.should == "" + end +end diff --git a/spec/ruby/core/encoding/converter/insert_output_spec.rb b/spec/ruby/core/encoding/converter/insert_output_spec.rb new file mode 100644 index 0000000000..1346adde1e --- /dev/null +++ b/spec/ruby/core/encoding/converter/insert_output_spec.rb @@ -0,0 +1,5 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter#insert_output" do + it "needs to be reviewed for spec completeness" +end diff --git a/spec/ruby/core/encoding/converter/inspect_spec.rb b/spec/ruby/core/encoding/converter/inspect_spec.rb new file mode 100644 index 0000000000..3170ee451f --- /dev/null +++ b/spec/ruby/core/encoding/converter/inspect_spec.rb @@ -0,0 +1,13 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter#inspect" do + it "includes the source and destination encodings in the return value" do + source = Encoding::UTF_8 + destination = Encoding::UTF_16LE + + output = "#<Encoding::Converter: #{source.name} to #{destination.name}>" + + x = Encoding::Converter.new(source, destination) + x.inspect.should == output + end +end diff --git a/spec/ruby/core/encoding/converter/last_error_spec.rb b/spec/ruby/core/encoding/converter/last_error_spec.rb new file mode 100644 index 0000000000..ff2a2b4cbe --- /dev/null +++ b/spec/ruby/core/encoding/converter/last_error_spec.rb @@ -0,0 +1,91 @@ +# encoding: binary +require_relative '../../../spec_helper' + +describe "Encoding::Converter#last_error" do + it "returns nil when the no conversion has been attempted" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.last_error.should be_nil + end + + it "returns nil when the last conversion did not produce an error" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.convert('a'.dup.force_encoding('ascii')) + ec.last_error.should be_nil + end + + it "returns nil when #primitive_convert last returned :destination_buffer_full" do + ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + ec.primitive_convert(+"\u{9999}", +"", 0, 0, partial_input: false) \ + .should == :destination_buffer_full + ec.last_error.should be_nil + end + + it "returns nil when #primitive_convert last returned :finished" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("glark".dup.force_encoding('utf-8'), +"").should == :finished + ec.last_error.should be_nil + end + + it "returns nil if the last conversion succeeded but the penultimate failed" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert(+"\xf1abcd", +"").should == :invalid_byte_sequence + ec.primitive_convert("glark".dup.force_encoding('utf-8'), +"").should == :finished + ec.last_error.should be_nil + end + + it "returns an Encoding::InvalidByteSequenceError when #primitive_convert last returned :invalid_byte_sequence" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert(+"\xf1abcd", +"").should == :invalid_byte_sequence + ec.last_error.should be_an_instance_of(Encoding::InvalidByteSequenceError) + end + + it "returns an Encoding::UndefinedConversionError when #primitive_convert last returned :undefined_conversion" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert(+"\u{9876}", +"").should == :undefined_conversion + ec.last_error.should be_an_instance_of(Encoding::UndefinedConversionError) + end + + it "returns an Encoding::InvalidByteSequenceError when #primitive_convert last returned :incomplete_input" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.primitive_convert(+"\xa4", +"", nil, 10).should == :incomplete_input + ec.last_error.should be_an_instance_of(Encoding::InvalidByteSequenceError) + end + + it "returns an Encoding::InvalidByteSequenceError when the last call to #convert produced one" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + exception = nil + -> { + ec.convert("\xf1abcd") + }.should raise_error(Encoding::InvalidByteSequenceError) { |e| + exception = e + } + ec.last_error.should be_an_instance_of(Encoding::InvalidByteSequenceError) + ec.last_error.message.should == exception.message + end + + it "returns an Encoding::UndefinedConversionError when the last call to #convert produced one" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + exception = nil + -> { + ec.convert("\u{9899}") + }.should raise_error(Encoding::UndefinedConversionError) { |e| + exception = e + } + ec.last_error.should be_an_instance_of(Encoding::UndefinedConversionError) + ec.last_error.message.should == exception.message + ec.last_error.message.should include "from UTF-8 to ISO-8859-1" + end + + it "returns the last error of #convert with a message showing the transcoding path" do + ec = Encoding::Converter.new("iso-8859-1", "Big5") + exception = nil + -> { + ec.convert("\xE9") # é in ISO-8859-1 + }.should raise_error(Encoding::UndefinedConversionError) { |e| + exception = e + } + ec.last_error.should be_an_instance_of(Encoding::UndefinedConversionError) + ec.last_error.message.should == exception.message + ec.last_error.message.should include "from ISO-8859-1 to UTF-8 to Big5" + end +end diff --git a/spec/ruby/core/encoding/converter/new_spec.rb b/spec/ruby/core/encoding/converter/new_spec.rb new file mode 100644 index 0000000000..a7bef53809 --- /dev/null +++ b/spec/ruby/core/encoding/converter/new_spec.rb @@ -0,0 +1,119 @@ +# encoding: binary +require_relative '../../../spec_helper' + +describe "Encoding::Converter.new" do + it "accepts a String for the source encoding" do + conv = Encoding::Converter.new("us-ascii", "utf-8") + conv.source_encoding.should == Encoding::US_ASCII + end + + it "accepts a String for the destination encoding" do + conv = Encoding::Converter.new("us-ascii", "utf-8") + conv.destination_encoding.should == Encoding::UTF_8 + end + + it "accepts an Encoding object for the source encoding" do + conv = Encoding::Converter.new(Encoding::US_ASCII, "utf-8") + conv.source_encoding.should == Encoding::US_ASCII + end + + it "accepts an Encoding object for the destination encoding" do + conv = Encoding::Converter.new("us-ascii", Encoding::UTF_8) + conv.destination_encoding.should == Encoding::UTF_8 + end + + it "raises an Encoding::ConverterNotFoundError if both encodings are the same" do + -> do + Encoding::Converter.new "utf-8", "utf-8" + end.should raise_error(Encoding::ConverterNotFoundError) + end + + it "calls #to_str to convert the source encoding argument to an encoding name" do + enc = mock("us-ascii") + enc.should_receive(:to_str).and_return("us-ascii") + conv = Encoding::Converter.new(enc, "utf-8") + conv.source_encoding.should == Encoding::US_ASCII + end + + it "calls #to_str to convert the destination encoding argument to an encoding name" do + enc = mock("utf-8") + enc.should_receive(:to_str).and_return("utf-8") + conv = Encoding::Converter.new("us-ascii", enc) + conv.destination_encoding.should == Encoding::UTF_8 + end + + it "sets replacement from the options Hash" do + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: "fubar") + conv.replacement.should == "fubar" + end + + it "calls #to_hash to convert the options argument to a Hash if not an Integer" do + opts = mock("encoding converter options") + opts.should_receive(:to_hash).and_return({ replace: "fubar" }) + conv = Encoding::Converter.new("us-ascii", "utf-8", **opts) + conv.replacement.should == "fubar" + end + + it "calls #to_str to convert the replacement object to a String" do + obj = mock("encoding converter replacement") + obj.should_receive(:to_str).and_return("fubar") + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: obj) + conv.replacement.should == "fubar" + end + + it "raises a TypeError if #to_str does not return a String" do + obj = mock("encoding converter replacement") + obj.should_receive(:to_str).and_return(1) + + -> do + Encoding::Converter.new("us-ascii", "utf-8", replace: obj) + end.should raise_error(TypeError) + end + + it "raises a TypeError if passed true for the replacement object" do + -> do + Encoding::Converter.new("us-ascii", "utf-8", replace: true) + end.should raise_error(TypeError) + end + + it "raises a TypeError if passed false for the replacement object" do + -> do + Encoding::Converter.new("us-ascii", "utf-8", replace: false) + end.should raise_error(TypeError) + end + + it "raises a TypeError if passed an Integer for the replacement object" do + -> do + Encoding::Converter.new("us-ascii", "utf-8", replace: 1) + end.should raise_error(TypeError) + end + + it "accepts an empty String for the replacement object" do + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: "") + conv.replacement.should == "" + end + + describe "when passed nil for the replacement object" do + describe "when the destination encoding is not UTF-8" do + it "sets the replacement String to '?'" do + conv = Encoding::Converter.new("us-ascii", "binary", replace: nil) + conv.replacement.should == "?" + end + + it "sets the replacement String encoding to US-ASCII" do + conv = Encoding::Converter.new("us-ascii", "binary", replace: nil) + conv.replacement.encoding.should == Encoding::US_ASCII + end + + it "sets the replacement String to '\\uFFFD'" do + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: nil) + conv.replacement.should == "\u{fffd}".dup.force_encoding("utf-8") + end + + it "sets the replacement String encoding to UTF-8" do + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: nil) + conv.replacement.encoding.should == Encoding::UTF_8 + end + end + end +end diff --git a/spec/ruby/core/encoding/converter/primitive_convert_spec.rb b/spec/ruby/core/encoding/converter/primitive_convert_spec.rb new file mode 100644 index 0000000000..e4aeed103e --- /dev/null +++ b/spec/ruby/core/encoding/converter/primitive_convert_spec.rb @@ -0,0 +1,216 @@ +# encoding: binary +# frozen_string_literal: false +require_relative '../../../spec_helper' + +describe "Encoding::Converter#primitive_convert" do + before :each do + @ec = Encoding::Converter.new("utf-8", "iso-8859-1") + end + + it "accepts a nil source buffer" do + -> { @ec.primitive_convert(nil,"") }.should_not raise_error + end + + it "accepts a String as the source buffer" do + -> { @ec.primitive_convert("","") }.should_not raise_error + end + + it "raises FrozenError when the destination buffer is a frozen String" do + -> { @ec.primitive_convert("", "".freeze) }.should raise_error(FrozenError) + end + + it "accepts nil for the destination byte offset" do + -> { @ec.primitive_convert("","", nil) }.should_not raise_error + end + + it "accepts an integer for the destination byte offset" do + -> { @ec.primitive_convert("","a", 1) }.should_not raise_error + end + + it "calls #to_int to convert the destination byte offset" do + offset = mock("encoding primitive_convert destination byte offset") + offset.should_receive(:to_int).and_return(2) + @ec.primitive_convert("abc", result = " ", offset).should == :finished + result.should == " abc" + end + + it "raises an ArgumentError if the destination byte offset is greater than the bytesize of the destination buffer" do + -> { @ec.primitive_convert("","am", 0) }.should_not raise_error + -> { @ec.primitive_convert("","am", 1) }.should_not raise_error + -> { @ec.primitive_convert("","am", 2) }.should_not raise_error + -> { @ec.primitive_convert("","am", 3) }.should raise_error(ArgumentError) + end + + it "uses the destination byte offset to determine where to write the result in the destination buffer" do + dest = "aa" + @ec.primitive_convert("b",dest, nil, 0) + dest.should == "aa" + + @ec.primitive_convert("b",dest, nil, 1) + dest.should == "aab" + + @ec.primitive_convert("b",dest, nil, 2) + dest.should == "aabbb" + end + + it "accepts nil for the destination bytesize" do + -> { @ec.primitive_convert("","", nil, nil) }.should_not raise_error + end + + it "accepts an integer for the destination bytesize" do + -> { @ec.primitive_convert("","", nil, 0) }.should_not raise_error + end + + it "allows a destination bytesize value greater than the bytesize of the source buffer" do + -> { @ec.primitive_convert("am","", nil, 3) }.should_not raise_error + end + + it "allows a destination bytesize value less than the bytesize of the source buffer" do + -> { @ec.primitive_convert("am","", nil, 1) }.should_not raise_error + end + + it "calls #to_int to convert the destination byte size" do + size = mock("encoding primitive_convert destination byte size") + size.should_receive(:to_int).and_return(2) + @ec.primitive_convert("abc", result = " ", 0, size).should == :destination_buffer_full + result.should == "ab" + end + + it "uses destination bytesize as the maximum bytesize of the destination buffer" do + dest = "" + @ec.primitive_convert("glark", dest, nil, 1) + dest.bytesize.should == 1 + end + + it "allows a destination buffer of unlimited size if destination bytesize is nil" do + source = "glark".force_encoding('utf-8') + dest = "" + @ec.primitive_convert("glark", dest, nil, nil) + dest.bytesize.should == source.bytesize + end + + it "accepts an options hash" do + @ec.primitive_convert("","",nil,nil, after_output: true).should == :finished + end + + it "sets the destination buffer's encoding to the destination encoding if the conversion succeeded" do + dest = "".force_encoding('utf-8') + dest.encoding.should == Encoding::UTF_8 + @ec.primitive_convert("\u{98}",dest).should == :finished + dest.encoding.should == Encoding::ISO_8859_1 + end + + it "sets the destination buffer's encoding to the destination encoding if the conversion failed" do + dest = "".force_encoding('utf-8') + dest.encoding.should == Encoding::UTF_8 + @ec.primitive_convert("\u{9878}",dest).should == :undefined_conversion + dest.encoding.should == Encoding::ISO_8859_1 + end + + it "removes the undefined part from the source buffer when returning :undefined_conversion" do + dest = "".force_encoding('utf-8') + s = "\u{9878}abcd" + @ec.primitive_convert(s, dest).should == :undefined_conversion + + s.should == "abcd" + end + + it "returns :incomplete_input when source buffer ends unexpectedly and :partial_input isn't specified" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.primitive_convert("\xa4", "", nil, nil, partial_input: false).should == :incomplete_input + end + + it "clears the source buffer when returning :incomplete_input" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + s = "\xa4" + ec.primitive_convert(s, "").should == :incomplete_input + + s.should == "" + end + + it "returns :source_buffer_empty when source buffer ends unexpectedly and :partial_input is true" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.primitive_convert("\xa4", "", nil, nil, partial_input: true).should == :source_buffer_empty + end + + it "clears the source buffer when returning :source_buffer_empty" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + s = "\xa4" + ec.primitive_convert(s, "", nil, nil, partial_input: true).should == :source_buffer_empty + + s.should == "" + end + + it "returns :undefined_conversion when a character in the source buffer is not representable in the output encoding" do + @ec.primitive_convert("\u{9876}","").should == :undefined_conversion + end + + it "returns :invalid_byte_sequence when an invalid byte sequence was found in the source buffer" do + @ec.primitive_convert("\xf1abcd","").should == :invalid_byte_sequence + end + + it "removes consumed and erroneous bytes from the source buffer when returning :invalid_byte_sequence" do + ec = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_8_MAC) + s = "\xC3\xA1\x80\x80\xC3\xA1".force_encoding("utf-8") + dest = "".force_encoding("utf-8") + ec.primitive_convert(s, dest) + + s.should == "\x80\xC3\xA1".force_encoding("utf-8") + end + + it "returns :finished when the conversion succeeded" do + @ec.primitive_convert("glark".force_encoding('utf-8'),"").should == :finished + end + + it "clears the source buffer when returning :finished" do + s = "glark".force_encoding('utf-8') + @ec.primitive_convert(s, "").should == :finished + + s.should == "" + end + + it "returns :destination_buffer_full when the destination buffer is too small" do + ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + source = "\u{9999}" + destination_bytesize = source.bytesize - 1 + ec.primitive_convert(source, "", 0, destination_bytesize) \ + .should == :destination_buffer_full + source.should == "" + end + + it "clears the source buffer when returning :destination_buffer_full" do + ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + s = "\u{9999}" + destination_bytesize = s.bytesize - 1 + ec.primitive_convert(s, "", 0, destination_bytesize).should == :destination_buffer_full + + s.should == "" + end + + it "keeps removing invalid bytes from the source buffer" do + ec = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_8_MAC) + s = "\x80\x80\x80" + dest = "".force_encoding(Encoding::UTF_8_MAC) + + ec.primitive_convert(s, dest) + s.should == "\x80\x80" + ec.primitive_convert(s, dest) + s.should == "\x80" + ec.primitive_convert(s, dest) + s.should == "" + end + + it "reuses read-again bytes after the first error" do + s = "\xf1abcd" + dest = "" + + @ec.primitive_convert(s, dest).should == :invalid_byte_sequence + s.should == "bcd" + @ec.primitive_errinfo[4].should == "a" + + @ec.primitive_convert(s, dest).should == :finished + s.should == "" + + dest.should == "abcd" + end +end diff --git a/spec/ruby/core/encoding/converter/primitive_errinfo_spec.rb b/spec/ruby/core/encoding/converter/primitive_errinfo_spec.rb new file mode 100644 index 0000000000..5ee8b1fecd --- /dev/null +++ b/spec/ruby/core/encoding/converter/primitive_errinfo_spec.rb @@ -0,0 +1,69 @@ +# encoding: binary +# frozen_string_literal: false +require_relative '../../../spec_helper' + +describe "Encoding::Converter#primitive_errinfo" do + it "returns [:source_buffer_empty,nil,nil,nil,nil] when no conversion has been attempted" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.primitive_errinfo.should == [:source_buffer_empty, nil, nil, nil, nil] + end + + it "returns [:finished,nil,nil,nil,nil] when #primitive_convert last returned :finished" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.primitive_convert("a","").should == :finished + ec.primitive_errinfo.should == [:finished, nil, nil, nil, nil] + end + + it "returns [:source_buffer_empty,nil,nil,nil, nil] when #convert last succeeded" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.convert("a".force_encoding('ascii')).should == "a".force_encoding('utf-8') + ec.primitive_errinfo.should == [:source_buffer_empty, nil, nil, nil, nil] + end + + it "returns [:destination_buffer_full,nil,nil,nil,nil] when #primitive_convert last returned :destination_buffer_full" do + ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + ec.primitive_convert("\u{9999}", "", 0, 0, partial_input: false).should == :destination_buffer_full + ec.primitive_errinfo.should == [:destination_buffer_full, nil, nil, nil, nil] + end + + it "returns the status of the last primitive conversion, even if it was successful and the previous one wasn't" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("\xf1abcd","").should == :invalid_byte_sequence + ec.primitive_convert("glark".force_encoding('utf-8'),"").should == :finished + ec.primitive_errinfo.should == [:finished, nil, nil, nil, nil] + end + + it "returns the state, source encoding, target encoding, and the erroneous bytes when #primitive_convert last returned :undefined_conversion" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("\u{9876}","").should == :undefined_conversion + ec.primitive_errinfo.should == + [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE9\xA1\xB6", ""] + end + + it "returns the state, source encoding, target encoding, and erroneous bytes when #primitive_convert last returned :incomplete_input" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.primitive_convert("\xa4", "", nil, 10).should == :incomplete_input + ec.primitive_errinfo.should == [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""] + end + + it "returns the state, source encoding, target encoding, erroneous bytes, and the read-again bytes when #primitive_convert last returned :invalid_byte_sequence" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("\xf1abcd","").should == :invalid_byte_sequence + ec.primitive_errinfo.should == + [:invalid_byte_sequence, "UTF-8", "ISO-8859-1", "\xF1", "a"] + end + + it "returns the state, source encoding, target encoding, erroneous bytes, and the read-again bytes when #convert last raised InvalidByteSequenceError" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + -> { ec.convert("\xf1abcd") }.should raise_error(Encoding::InvalidByteSequenceError) + ec.primitive_errinfo.should == + [:invalid_byte_sequence, "UTF-8", "ISO-8859-1", "\xF1", "a"] + end + + it "returns the state, source encoding, target encoding, erroneous bytes, and the read-again bytes when #finish last raised InvalidByteSequenceError" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.convert("\xa4") + -> { ec.finish }.should raise_error(Encoding::InvalidByteSequenceError) + ec.primitive_errinfo.should == [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""] + end +end diff --git a/spec/ruby/core/encoding/converter/putback_spec.rb b/spec/ruby/core/encoding/converter/putback_spec.rb new file mode 100644 index 0000000000..04bb565655 --- /dev/null +++ b/spec/ruby/core/encoding/converter/putback_spec.rb @@ -0,0 +1,56 @@ +# encoding: binary +require_relative '../../../spec_helper' + +describe "Encoding::Converter#putback" do + before :each do + @ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + @ret = @ec.primitive_convert(@src=+"abc\xa1def", @dst=+"", nil, 10) + end + + it "returns a String" do + @ec.putback.should be_an_instance_of(String) + end + + it "returns a String in the source encoding" do + @ec.putback.encoding.should == Encoding::EUC_JP + end + + it "returns the bytes buffered due to an :invalid_byte_sequence error" do + @ret.should == :invalid_byte_sequence + @ec.putback.should == 'd' + @ec.primitive_errinfo.last.should == 'd' + end + + it "allows conversion to be resumed after an :invalid_byte_sequence" do + @src = @ec.putback + @src + @ret = @ec.primitive_convert(@src, @dst, nil, 10) + @ret.should == :finished + @dst.should == "abcdef" + @src.should == "" + end + + it "returns an empty String when there are no more bytes to put back" do + @ec.putback + @ec.putback.should == "" + end + + it "returns the problematic bytes for UTF-16LE" do + ec = Encoding::Converter.new("utf-16le", "iso-8859-1") + src = +"\x00\xd8\x61\x00" + dst = +"" + ec.primitive_convert(src, dst).should == :invalid_byte_sequence + ec.primitive_errinfo.should == [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"] + ec.putback.should == "a\x00".dup.force_encoding("utf-16le") + ec.putback.should == "" + end + + it "accepts an integer argument corresponding to the number of bytes to be put back" do + ec = Encoding::Converter.new("utf-16le", "iso-8859-1") + src = +"\x00\xd8\x61\x00" + dst = +"" + ec.primitive_convert(src, dst).should == :invalid_byte_sequence + ec.primitive_errinfo.should == [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"] + ec.putback(2).should == "a\x00".dup.force_encoding("utf-16le") + ec.putback.should == "" + end +end diff --git a/spec/ruby/core/encoding/converter/replacement_spec.rb b/spec/ruby/core/encoding/converter/replacement_spec.rb new file mode 100644 index 0000000000..ea514ca8dd --- /dev/null +++ b/spec/ruby/core/encoding/converter/replacement_spec.rb @@ -0,0 +1,72 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter#replacement" do + it "returns '?' in US-ASCII when the destination encoding is not UTF-8" do + ec = Encoding::Converter.new("utf-8", "us-ascii") + ec.replacement.should == "?" + ec.replacement.encoding.should == Encoding::US_ASCII + + ec = Encoding::Converter.new("utf-8", "sjis") + ec.replacement.should == "?" + ec.replacement.encoding.should == Encoding::US_ASCII + end + + it "returns \\uFFFD when the destination encoding is UTF-8" do + ec = Encoding::Converter.new("us-ascii", "utf-8") + ec.replacement.should == "\u{fffd}".dup.force_encoding('utf-8') + ec.replacement.encoding.should == Encoding::UTF_8 + end +end + +describe "Encoding::Converter#replacement=" do + it "accepts a String argument" do + ec = Encoding::Converter.new("utf-8", "us-ascii") + ec.replacement = "!" + ec.replacement.should == "!" + end + + it "accepts a String argument of arbitrary length" do + ec = Encoding::Converter.new("utf-8", "us-ascii") + ec.replacement = "?!?" * 9999 + ec.replacement.should == "?!?" * 9999 + end + + it "raises a TypeError if assigned a non-String argument" do + ec = Encoding::Converter.new("utf-8", "us-ascii") + -> { ec.replacement = nil }.should raise_error(TypeError) + end + + it "sets #replacement" do + ec = Encoding::Converter.new("us-ascii", "utf-8") + ec.replacement.should == "\u{fffd}".dup.force_encoding('utf-8') + ec.replacement = '?'.encode('utf-8') + ec.replacement.should == '?'.dup.force_encoding('utf-8') + end + + it "raises an UndefinedConversionError is the argument cannot be converted into the destination encoding" do + ec = Encoding::Converter.new("sjis", "ascii") + utf8_q = "\u{986}".dup.force_encoding('utf-8') + ec.primitive_convert(utf8_q.dup, +"").should == :undefined_conversion + -> { ec.replacement = utf8_q }.should \ + raise_error(Encoding::UndefinedConversionError) + end + + it "does not change the replacement character if the argument cannot be converted into the destination encoding" do + ec = Encoding::Converter.new("sjis", "ascii") + utf8_q = "\u{986}".dup.force_encoding('utf-8') + ec.primitive_convert(utf8_q.dup, +"").should == :undefined_conversion + -> { ec.replacement = utf8_q }.should \ + raise_error(Encoding::UndefinedConversionError) + ec.replacement.should == "?".dup.force_encoding('us-ascii') + end + + it "uses the replacement character" do + ec = Encoding::Converter.new("utf-8", "us-ascii", :invalid => :replace, :undef => :replace) + ec.replacement = "!" + dest = +"" + status = ec.primitive_convert(+"䏿–‡123", dest) + + status.should == :finished + dest.should == "!!123" + end +end diff --git a/spec/ruby/core/encoding/converter/search_convpath_spec.rb b/spec/ruby/core/encoding/converter/search_convpath_spec.rb new file mode 100644 index 0000000000..59fe4520c0 --- /dev/null +++ b/spec/ruby/core/encoding/converter/search_convpath_spec.rb @@ -0,0 +1,30 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter.search_convpath" do + it "returns an Array with a single element if there is a direct converter" do + cp = Encoding::Converter.search_convpath('ASCII', 'UTF-8') + cp.should == [[Encoding::US_ASCII, Encoding::UTF_8]] + end + + it "returns multiple encoding pairs when direct conversion is impossible" do + cp = Encoding::Converter.search_convpath('ascii','Big5') + cp.should == [ + [Encoding::US_ASCII, Encoding::UTF_8], + [Encoding::UTF_8, Encoding::Big5] + ] + end + + it "indicates if crlf_newline conversion would occur" do + cp = Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", crlf_newline: true) + cp.last.should == "crlf_newline" + + cp = Encoding::Converter.search_convpath("ASCII", "UTF-8", crlf_newline: false) + cp.last.should_not == "crlf_newline" + end + + it "raises an Encoding::ConverterNotFoundError if no conversion path exists" do + -> do + Encoding::Converter.search_convpath(Encoding::BINARY, Encoding::Emacs_Mule) + end.should raise_error(Encoding::ConverterNotFoundError) + end +end diff --git a/spec/ruby/core/encoding/converter/source_encoding_spec.rb b/spec/ruby/core/encoding/converter/source_encoding_spec.rb new file mode 100644 index 0000000000..6196f717bd --- /dev/null +++ b/spec/ruby/core/encoding/converter/source_encoding_spec.rb @@ -0,0 +1,11 @@ +require_relative '../../../spec_helper' + +describe "Encoding::Converter#source_encoding" do + it "returns the source encoding as an Encoding object" do + ec = Encoding::Converter.new('ASCII','Big5') + ec.source_encoding.should == Encoding::US_ASCII + + ec = Encoding::Converter.new('Shift_JIS','EUC-JP') + ec.source_encoding.should == Encoding::SHIFT_JIS + end +end diff --git a/spec/ruby/core/encoding/default_external_spec.rb b/spec/ruby/core/encoding/default_external_spec.rb new file mode 100644 index 0000000000..9aae4976e0 --- /dev/null +++ b/spec/ruby/core/encoding/default_external_spec.rb @@ -0,0 +1,69 @@ +require_relative '../../spec_helper' + +describe "Encoding.default_external" do + before :each do + @original_encoding = Encoding.default_external + end + + after :each do + Encoding.default_external = @original_encoding + end + + it "returns an Encoding object" do + Encoding.default_external.should be_an_instance_of(Encoding) + end + + it "returns the default external encoding" do + Encoding.default_external = Encoding::SHIFT_JIS + Encoding.default_external.should == Encoding::SHIFT_JIS + end + + platform_is :windows do + it 'is UTF-8 by default on Windows' do + Encoding.default_external.should == Encoding::UTF_8 + end + end +end + +describe "Encoding.default_external=" do + before :each do + @original_encoding = Encoding.default_external + end + + after :each do + Encoding.default_external = @original_encoding + end + + it "sets the default external encoding" do + Encoding.default_external = Encoding::SHIFT_JIS + Encoding.default_external.should == Encoding::SHIFT_JIS + Encoding.find('external').should == Encoding::SHIFT_JIS + end + + platform_is_not :windows do + it "also sets the filesystem encoding" do + Encoding.default_external = Encoding::SHIFT_JIS + Encoding.find('filesystem').should == Encoding::SHIFT_JIS + end + end + + it "can accept a name of an encoding as a String" do + Encoding.default_external = 'Shift_JIS' + Encoding.default_external.should == Encoding::SHIFT_JIS + end + + it "calls #to_s on arguments that are neither Strings nor Encodings" do + string = mock('string') + string.should_receive(:to_str).at_least(1).and_return('US-ASCII') + Encoding.default_external = string + Encoding.default_external.should == Encoding::ASCII + end + + it "raises a TypeError unless the argument is an Encoding or convertible to a String" do + -> { Encoding.default_external = [] }.should raise_error(TypeError) + end + + it "raises an ArgumentError if the argument is nil" do + -> { Encoding.default_external = nil }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/encoding/default_internal_spec.rb b/spec/ruby/core/encoding/default_internal_spec.rb new file mode 100644 index 0000000000..855f4e9f32 --- /dev/null +++ b/spec/ruby/core/encoding/default_internal_spec.rb @@ -0,0 +1,74 @@ +require_relative '../../spec_helper' + +describe "Encoding.default_internal" do + before :each do + @original_encoding = Encoding.default_internal + end + + after :each do + Encoding.default_internal = @original_encoding + end + + it "is nil by default" do + Encoding.default_internal.should be_nil + end + + it "returns an Encoding object if a default internal encoding is set" do + Encoding.default_internal = Encoding::ASCII + Encoding.default_internal.should be_an_instance_of(Encoding) + end + + it "returns nil if no default internal encoding is set" do + Encoding.default_internal = nil + Encoding.default_internal.should be_nil + end + + it "returns the default internal encoding" do + Encoding.default_internal = Encoding::BINARY + Encoding.default_internal.should == Encoding::BINARY + end +end + +describe "Encoding.default_internal=" do + before :each do + @original_encoding = Encoding.default_internal + end + + after :each do + Encoding.default_internal = @original_encoding + end + + it "sets the default internal encoding" do + Encoding.default_internal = Encoding::SHIFT_JIS + Encoding.default_internal.should == Encoding::SHIFT_JIS + end + + it "can accept a name of an encoding as a String" do + Encoding.default_internal = 'Shift_JIS' + Encoding.default_internal.should == Encoding::SHIFT_JIS + end + + it "calls #to_str to convert an object to a String" do + obj = mock('string') + obj.should_receive(:to_str).at_least(1).times.and_return('ascii') + + Encoding.default_internal = obj + Encoding.default_internal.should == Encoding::ASCII + end + + it "raises a TypeError if #to_str does not return a String" do + obj = mock('string') + obj.should_receive(:to_str).at_least(1).times.and_return(1) + + -> { Encoding.default_internal = obj }.should raise_error(TypeError) + end + + it "raises a TypeError when passed an object not providing #to_str" do + -> { Encoding.default_internal = mock("encoding") }.should raise_error(TypeError) + end + + it "accepts an argument of nil to unset the default internal encoding" do + Encoding.default_internal = nil + Encoding.default_internal.should be_nil + end +end diff --git a/spec/ruby/core/encoding/dummy_spec.rb b/spec/ruby/core/encoding/dummy_spec.rb new file mode 100644 index 0000000000..75ffcd5a4e --- /dev/null +++ b/spec/ruby/core/encoding/dummy_spec.rb @@ -0,0 +1,14 @@ +require_relative '../../spec_helper' + +describe "Encoding#dummy?" do + it "returns false for proper encodings" do + Encoding::UTF_8.dummy?.should be_false + Encoding::ASCII.dummy?.should be_false + end + + it "returns true for dummy encodings" do + Encoding::ISO_2022_JP.dummy?.should be_true + Encoding::CP50221.dummy?.should be_true + Encoding::UTF_7.dummy?.should be_true + end +end diff --git a/spec/ruby/core/encoding/find_spec.rb b/spec/ruby/core/encoding/find_spec.rb new file mode 100644 index 0000000000..9c34fe0e77 --- /dev/null +++ b/spec/ruby/core/encoding/find_spec.rb @@ -0,0 +1,82 @@ +require_relative '../../spec_helper' + +describe "Encoding.find" do + before :all do + @encodings = Encoding.aliases.to_a.flatten.uniq + end + + it "returns the corresponding Encoding object if given a valid encoding name" do + @encodings.each do |enc| + Encoding.find(enc).should be_an_instance_of(Encoding) + end + end + + it "returns the corresponding Encoding object if given a valid alias name" do + Encoding.aliases.keys.each do |enc_alias| + Encoding.find(enc_alias).should be_an_instance_of(Encoding) + end + end + + it "raises a TypeError if passed a Symbol" do + -> { Encoding.find(:"utf-8") }.should raise_error(TypeError) + end + + it "returns the passed Encoding object" do + Encoding.find(Encoding::UTF_8).should == Encoding::UTF_8 + end + + it "accepts encoding names as Strings" do + Encoding.list.each do |enc| + Encoding.find(enc.name).should == enc + end + end + + it "accepts any object as encoding name, if it responds to #to_str" do + obj = Class.new do + attr_writer :encoding_name + def to_str; @encoding_name; end + end.new + + Encoding.list.each do |enc| + obj.encoding_name = enc.name + Encoding.find(obj).should == enc + end + end + + it "is case insensitive" do + @encodings.each do |enc| + Encoding.find(enc.upcase).should == Encoding.find(enc) + end + end + + it "raises an ArgumentError if the given encoding does not exist" do + -> { Encoding.find('dh2dh278d') }.should raise_error(ArgumentError, 'unknown encoding name - dh2dh278d') + end + + # Not sure how to do a better test, since locale depends on weird platform-specific stuff + it "supports the 'locale' encoding alias" do + enc = Encoding.find('locale') + enc.should_not == nil + end + + it "returns default external encoding for the 'external' encoding alias" do + enc = Encoding.find('external') + enc.should == Encoding.default_external + end + + it "returns default internal encoding for the 'internal' encoding alias" do + enc = Encoding.find('internal') + enc.should == Encoding.default_internal + end + + platform_is_not :windows do + it "uses default external encoding for the 'filesystem' encoding alias" do + enc = Encoding.find('filesystem') + enc.should == Encoding.default_external + end + end + + platform_is :windows do + it "needs to be reviewed for spec completeness" + end +end diff --git a/spec/ruby/core/encoding/fixtures/classes.rb b/spec/ruby/core/encoding/fixtures/classes.rb new file mode 100644 index 0000000000..943865e8d8 --- /dev/null +++ b/spec/ruby/core/encoding/fixtures/classes.rb @@ -0,0 +1,49 @@ +# encoding: binary +module EncodingSpecs + class UndefinedConversionError + def self.exception + ec = Encoding::Converter.new('utf-8','ascii') + begin + ec.convert("\u{8765}") + rescue Encoding::UndefinedConversionError => e + e + end + end + end + + class UndefinedConversionErrorIndirect + def self.exception + ec = Encoding::Converter.new("ISO-8859-1", "EUC-JP") + begin + ec.convert("\xA0") + rescue Encoding::UndefinedConversionError => e + e + end + end + end + + class InvalidByteSequenceError + def self.exception + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + begin + ec.convert("\xf1abcd") + rescue Encoding::InvalidByteSequenceError => e + # Return the exception object and the primitive_errinfo Array + [e, ec.primitive_errinfo] + end + end + end + + class InvalidByteSequenceErrorIndirect + def self.exception + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + begin + ec.convert("abc\xA1\xFFdef") + rescue Encoding::InvalidByteSequenceError => e + # Return the exception object and the discarded bytes reported by + # #primitive_errinfo + [e, ec.primitive_errinfo] + end + end + end +end diff --git a/spec/ruby/core/encoding/inspect_spec.rb b/spec/ruby/core/encoding/inspect_spec.rb new file mode 100644 index 0000000000..df96141db9 --- /dev/null +++ b/spec/ruby/core/encoding/inspect_spec.rb @@ -0,0 +1,33 @@ +require_relative '../../spec_helper' + +describe "Encoding#inspect" do + it "returns a String" do + Encoding::UTF_8.inspect.should be_an_instance_of(String) + end + + ruby_version_is ""..."3.4" do + it "returns #<Encoding:name> for a non-dummy encoding named 'name'" do + Encoding.list.to_a.reject {|e| e.dummy? }.each do |enc| + enc.inspect.should =~ /#<Encoding:#{enc.name}>/ + end + end + end + + ruby_version_is "3.4" do + it "returns #<Encoding:name> for a non-dummy encoding named 'name'" do + Encoding.list.to_a.reject {|e| e.dummy? }.each do |enc| + if enc.name == "ASCII-8BIT" + enc.inspect.should == "#<Encoding:BINARY (ASCII-8BIT)>" + else + enc.inspect.should =~ /#<Encoding:#{enc.name}>/ + end + end + end + end + + it "returns #<Encoding:name (dummy)> for a dummy encoding named 'name'" do + Encoding.list.to_a.select {|e| e.dummy? }.each do |enc| + enc.inspect.should =~ /#<Encoding:#{enc.name} \(dummy\)>/ + end + end +end diff --git a/spec/ruby/core/encoding/invalid_byte_sequence_error/destination_encoding_name_spec.rb b/spec/ruby/core/encoding/invalid_byte_sequence_error/destination_encoding_name_spec.rb new file mode 100644 index 0000000000..2b15fc1a0f --- /dev/null +++ b/spec/ruby/core/encoding/invalid_byte_sequence_error/destination_encoding_name_spec.rb @@ -0,0 +1,19 @@ +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::InvalidByteSequenceError#destination_encoding_name" do + before :each do + @exception, = EncodingSpecs::InvalidByteSequenceError.exception + @exception2, = EncodingSpecs::InvalidByteSequenceErrorIndirect.exception + end + + it "returns a String" do + @exception.destination_encoding_name.should be_an_instance_of(String) + @exception2.destination_encoding_name.should be_an_instance_of(String) + end + + it "is equal to the destination encoding name of the object that raised it" do + @exception.destination_encoding_name.should == "ISO-8859-1" + @exception2.destination_encoding_name.should == "UTF-8" + end +end diff --git a/spec/ruby/core/encoding/invalid_byte_sequence_error/destination_encoding_spec.rb b/spec/ruby/core/encoding/invalid_byte_sequence_error/destination_encoding_spec.rb new file mode 100644 index 0000000000..c2ed6de1d8 --- /dev/null +++ b/spec/ruby/core/encoding/invalid_byte_sequence_error/destination_encoding_spec.rb @@ -0,0 +1,19 @@ +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::InvalidByteSequenceError#destination_encoding" do + before :each do + @exception, = EncodingSpecs::InvalidByteSequenceError.exception + @exception2, = EncodingSpecs::InvalidByteSequenceErrorIndirect.exception + end + + it "returns an Encoding object" do + @exception.destination_encoding.should be_an_instance_of(Encoding) + @exception2.destination_encoding.should be_an_instance_of(Encoding) + end + + it "is equal to the destination encoding of the object that raised it" do + @exception.destination_encoding.should == Encoding::ISO_8859_1 + @exception2.destination_encoding.should == Encoding::UTF_8 + end +end diff --git a/spec/ruby/core/encoding/invalid_byte_sequence_error/error_bytes_spec.rb b/spec/ruby/core/encoding/invalid_byte_sequence_error/error_bytes_spec.rb new file mode 100644 index 0000000000..8b7e87960f --- /dev/null +++ b/spec/ruby/core/encoding/invalid_byte_sequence_error/error_bytes_spec.rb @@ -0,0 +1,31 @@ +# encoding: binary +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::InvalidByteSequenceError#error_bytes" do + before :each do + @exception, @errinfo = EncodingSpecs::InvalidByteSequenceError.exception + @exception2, @errinfo2 = EncodingSpecs::InvalidByteSequenceErrorIndirect.exception + end + + it "returns a String" do + @exception.error_bytes.should be_an_instance_of(String) + @exception2.error_bytes.should be_an_instance_of(String) + end + + it "returns the bytes that caused the exception" do + @exception.error_bytes.size.should == 1 + @exception.error_bytes.should == "\xF1" + @exception.error_bytes.should == @errinfo[-2] + + @exception2.error_bytes.size.should == 1 + @exception2.error_bytes.should == "\xA1" + @exception2.error_bytes.should == @errinfo2[-2] + end + + it "uses BINARY as the encoding" do + @exception.error_bytes.encoding.should == Encoding::BINARY + + @exception2.error_bytes.encoding.should == Encoding::BINARY + end +end diff --git a/spec/ruby/core/encoding/invalid_byte_sequence_error/incomplete_input_spec.rb b/spec/ruby/core/encoding/invalid_byte_sequence_error/incomplete_input_spec.rb new file mode 100644 index 0000000000..83606f77b4 --- /dev/null +++ b/spec/ruby/core/encoding/invalid_byte_sequence_error/incomplete_input_spec.rb @@ -0,0 +1,28 @@ +# encoding: binary +require_relative '../../../spec_helper' + +describe "Encoding::InvalidByteSequenceError#incomplete_input?" do + it "returns nil by default" do + Encoding::InvalidByteSequenceError.new.incomplete_input?.should be_nil + end + + it "returns true if #primitive_convert returned :incomplete_input for the same data" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.primitive_convert(+"\xA1", +'').should == :incomplete_input + begin + ec.convert("\xA1") + rescue Encoding::InvalidByteSequenceError => e + e.incomplete_input?.should be_true + end + end + + it "returns false if #primitive_convert returned :invalid_byte_sequence for the same data" do + ec = Encoding::Converter.new("ascii", "utf-8") + ec.primitive_convert(+"\xfffffffff", +'').should == :invalid_byte_sequence + begin + ec.convert("\xfffffffff") + rescue Encoding::InvalidByteSequenceError => e + e.incomplete_input?.should be_false + end + end +end diff --git a/spec/ruby/core/encoding/invalid_byte_sequence_error/readagain_bytes_spec.rb b/spec/ruby/core/encoding/invalid_byte_sequence_error/readagain_bytes_spec.rb new file mode 100644 index 0000000000..e5ad0a61bd --- /dev/null +++ b/spec/ruby/core/encoding/invalid_byte_sequence_error/readagain_bytes_spec.rb @@ -0,0 +1,31 @@ +# encoding: binary +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::InvalidByteSequenceError#readagain_bytes" do + before :each do + @exception, @errinfo = EncodingSpecs::InvalidByteSequenceError.exception + @exception2, @errinfo2 = EncodingSpecs::InvalidByteSequenceErrorIndirect.exception + end + + it "returns a String" do + @exception.readagain_bytes.should be_an_instance_of(String) + @exception2.readagain_bytes.should be_an_instance_of(String) + end + + it "returns the bytes to be read again" do + @exception.readagain_bytes.size.should == 1 + @exception.readagain_bytes.should == "a".dup.force_encoding('binary') + @exception.readagain_bytes.should == @errinfo[-1] + + @exception2.readagain_bytes.size.should == 1 + @exception2.readagain_bytes.should == "\xFF".dup.force_encoding('binary') + @exception2.readagain_bytes.should == @errinfo2[-1] + end + + it "uses BINARY as the encoding" do + @exception.readagain_bytes.encoding.should == Encoding::BINARY + + @exception2.readagain_bytes.encoding.should == Encoding::BINARY + end +end diff --git a/spec/ruby/core/encoding/invalid_byte_sequence_error/source_encoding_name_spec.rb b/spec/ruby/core/encoding/invalid_byte_sequence_error/source_encoding_name_spec.rb new file mode 100644 index 0000000000..a9464114a8 --- /dev/null +++ b/spec/ruby/core/encoding/invalid_byte_sequence_error/source_encoding_name_spec.rb @@ -0,0 +1,29 @@ +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::UndefinedConversionError#source_encoding_name" do + before :each do + @exception, = EncodingSpecs::UndefinedConversionError.exception + @exception2, = EncodingSpecs::UndefinedConversionErrorIndirect.exception + end + + it "returns a String" do + @exception.source_encoding_name.should be_an_instance_of(String) + end + + it "is equal to the source encoding name of the object that raised it" do + @exception.source_encoding_name.should == "UTF-8" + end + + # The source encoding specified in the Encoding::Converter constructor may + # differ from the source encoding returned here. What seems to happen is + # that when transcoding along a path with multiple pairs of encodings, the + # last one encountered when the error occurred is returned. So in this + # case, the conversion path is ISO-8859-1 -> UTF-8 -> EUC-JP. The + # conversion from ISO-8859-1 -> UTF-8 succeeded, but the conversion from + # UTF-8 to EUC-JP failed. IOW, it failed when the source encoding was + # UTF-8, so UTF-8 is regarded as the source encoding. + it "is equal to the source encoding at the stage of the conversion path where the error occurred" do + @exception2.source_encoding_name.should == 'UTF-8' + end +end diff --git a/spec/ruby/core/encoding/invalid_byte_sequence_error/source_encoding_spec.rb b/spec/ruby/core/encoding/invalid_byte_sequence_error/source_encoding_spec.rb new file mode 100644 index 0000000000..7fdc0a122b --- /dev/null +++ b/spec/ruby/core/encoding/invalid_byte_sequence_error/source_encoding_spec.rb @@ -0,0 +1,34 @@ +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::InvalidByteSequenceError#source_encoding" do + before :each do + @exception, = EncodingSpecs::InvalidByteSequenceError.exception + @exception2, = EncodingSpecs::InvalidByteSequenceErrorIndirect.exception + end + + it "returns an Encoding object" do + @exception.source_encoding.should be_an_instance_of(Encoding) + @exception2.source_encoding.should be_an_instance_of(Encoding) + end + + it "is equal to the source encoding of the object that raised it" do + @exception.source_encoding.should == Encoding::UTF_8 + end + + # The source encoding specified in the Encoding::Converter constructor may + # differ from the source encoding returned here. What seems to happen is + # that when transcoding along a path with multiple pairs of encodings, the + # last one encountered when the error occurred is returned. So in this + # case, the conversion path is EUC-JP -> UTF-8 -> ISO-8859-1. The + # conversions failed with the first pair of encodings (i.e. transcoding + # from EUC-JP to UTF-8, so UTF-8 is regarded as the source encoding; if + # the error had occurred when converting from UTF-8 to ISO-8859-1, UTF-8 + # would have been the source encoding. + + # FIXME: Derive example where the failure occurs at the UTF-8 -> + # ISO-8859-1 case so as to better illustrate the issue + it "is equal to the source encoding at the stage of the conversion path where the error occurred" do + @exception2.source_encoding.should == Encoding::EUC_JP + end +end diff --git a/spec/ruby/core/encoding/list_spec.rb b/spec/ruby/core/encoding/list_spec.rb new file mode 100644 index 0000000000..bd3d5b7bc0 --- /dev/null +++ b/spec/ruby/core/encoding/list_spec.rb @@ -0,0 +1,49 @@ +require_relative '../../spec_helper' + +describe "Encoding.list" do + it "returns an Array" do + Encoding.list.should be_an_instance_of(Array) + end + + it "returns an Array of Encoding objects" do + Encoding.list.each do |enc| + enc.should be_an_instance_of(Encoding) + end + end + + it "returns each encoding only once" do + orig = Encoding.list.map { |e| e.name } + orig.should == orig.uniq + end + + it "includes the default external encoding" do + Encoding.list.include?(Encoding.default_external).should be_true + end + + it "does not include any alias names" do + Encoding.aliases.keys.each do |enc_alias| + Encoding.list.include?(enc_alias).should be_false + end + end + + it "includes all aliased encodings" do + Encoding.aliases.values.each do |enc_alias| + Encoding.list.include?(Encoding.find(enc_alias)).should be_true + end + end + + it "includes dummy encodings" do + Encoding.list.select { |e| e.dummy? }.should_not == [] + end + + it 'includes UTF-8 encoding' do + Encoding.list.should.include?(Encoding::UTF_8) + end + + it 'includes CESU-8 encoding' do + Encoding.list.should.include?(Encoding::CESU_8) + end + + # TODO: Find example that illustrates this + it "updates the list when #find is used to load a new encoding" +end diff --git a/spec/ruby/core/encoding/locale_charmap_spec.rb b/spec/ruby/core/encoding/locale_charmap_spec.rb new file mode 100644 index 0000000000..8143b9083a --- /dev/null +++ b/spec/ruby/core/encoding/locale_charmap_spec.rb @@ -0,0 +1,56 @@ +require_relative '../../spec_helper' + +describe "Encoding.locale_charmap" do + it "returns a String" do + Encoding.locale_charmap.should be_an_instance_of(String) + end + + # FIXME: Get this working on Windows + platform_is :linux do + platform_is_not :android do + it "returns a value based on the LC_ALL environment variable" do + old_lc_all = ENV['LC_ALL'] + ENV['LC_ALL'] = 'C' + ruby_exe("print Encoding.locale_charmap").should == 'ANSI_X3.4-1968' + ENV['LC_ALL'] = old_lc_all + end + end + end + + platform_is :freebsd, :openbsd, :darwin do + it "returns a value based on the LC_ALL environment variable" do + old_lc_all = ENV['LC_ALL'] + ENV['LC_ALL'] = 'C' + ruby_exe("print Encoding.locale_charmap").should == 'US-ASCII' + ENV['LC_ALL'] = old_lc_all + end + end + + platform_is :netbsd do + it "returns a value based on the LC_ALL environment variable" do + old_lc_all = ENV['LC_ALL'] + ENV['LC_ALL'] = 'C' + ruby_exe("print Encoding.locale_charmap").should == '646' + ENV['LC_ALL'] = old_lc_all + end + end + + platform_is :android do + it "always returns UTF-8" do + old_lc_all = ENV['LC_ALL'] + ENV['LC_ALL'] = 'C' + ruby_exe("print Encoding.locale_charmap").should == 'UTF-8' + ENV['LC_ALL'] = old_lc_all + end + end + + platform_is :bsd, :darwin, :linux do + it "is unaffected by assigning to ENV['LC_ALL'] in the same process" do + old_charmap = Encoding.locale_charmap + old_lc_all = ENV['LC_ALL'] + ENV['LC_ALL'] = 'C' + Encoding.locale_charmap.should == old_charmap + ENV['LC_ALL'] = old_lc_all + end + end +end diff --git a/spec/ruby/core/encoding/name_list_spec.rb b/spec/ruby/core/encoding/name_list_spec.rb new file mode 100644 index 0000000000..836381c4d8 --- /dev/null +++ b/spec/ruby/core/encoding/name_list_spec.rb @@ -0,0 +1,23 @@ +require_relative '../../spec_helper' + +describe "Encoding.name_list" do + it "returns an Array" do + Encoding.name_list.should be_an_instance_of(Array) + end + + it "returns encoding names as Strings" do + Encoding.name_list.each {|e| e.should be_an_instance_of(String) } + end + + it "includes all aliases" do + Encoding.aliases.keys.each do |enc_alias| + Encoding.name_list.include?(enc_alias).should be_true + end + end + + it "includes all non-dummy encodings" do + Encoding.list.each do |enc| + Encoding.name_list.include?(enc.name).should be_true + end + end +end diff --git a/spec/ruby/core/encoding/name_spec.rb b/spec/ruby/core/encoding/name_spec.rb new file mode 100644 index 0000000000..dce9347978 --- /dev/null +++ b/spec/ruby/core/encoding/name_spec.rb @@ -0,0 +1,6 @@ +require_relative "../../spec_helper" +require_relative 'shared/name' + +describe "Encoding#name" do + it_behaves_like :encoding_name, :name +end diff --git a/spec/ruby/core/encoding/names_spec.rb b/spec/ruby/core/encoding/names_spec.rb new file mode 100644 index 0000000000..9ded043bbb --- /dev/null +++ b/spec/ruby/core/encoding/names_spec.rb @@ -0,0 +1,35 @@ +require_relative '../../spec_helper' + +describe "Encoding#names" do + it "returns an Array" do + Encoding.name_list.each do |name| + e = Encoding.find(name) or next + e.names.should be_an_instance_of(Array) + end + end + + it "returns names as Strings" do + Encoding.name_list.each do |name| + e = Encoding.find(name) or next + e.names.each do |this_name| + this_name.should be_an_instance_of(String) + end + end + end + + it "returns #name as the first value" do + Encoding.name_list.each do |name| + e = Encoding.find(name) or next + e.names.first.should == e.name + end + end + + it "includes any aliases the encoding has" do + Encoding.name_list.each do |name| + e = Encoding.find(name) or next + aliases = Encoding.aliases.select{|a,n| n == name}.keys + names = e.names + aliases.each {|a| names.include?(a).should be_true} + end + end +end diff --git a/spec/ruby/core/encoding/replicate_spec.rb b/spec/ruby/core/encoding/replicate_spec.rb new file mode 100644 index 0000000000..2da998837f --- /dev/null +++ b/spec/ruby/core/encoding/replicate_spec.rb @@ -0,0 +1,88 @@ +# encoding: binary +require_relative '../../spec_helper' + +describe "Encoding#replicate" do + ruby_version_is ""..."3.3" do + before :all do + @i = 0 + end + + before :each do + @i += 1 + @prefix = "RS#{@i}" + end + + it "returns a replica of ASCII" do + name = @prefix + '-ASCII' + e = suppress_warning { Encoding::ASCII.replicate(name) } + e.name.should == name + Encoding.find(name).should == e + + "a".dup.force_encoding(e).valid_encoding?.should be_true + "\x80".dup.force_encoding(e).valid_encoding?.should be_false + end + + it "returns a replica of UTF-8" do + name = @prefix + 'UTF-8' + e = suppress_warning { Encoding::UTF_8.replicate(name) } + e.name.should == name + Encoding.find(name).should == e + + "a".dup.force_encoding(e).valid_encoding?.should be_true + "\u3042".dup.force_encoding(e).valid_encoding?.should be_true + "\x80".dup.force_encoding(e).valid_encoding?.should be_false + end + + it "returns a replica of UTF-16BE" do + name = @prefix + 'UTF-16-BE' + e = suppress_warning { Encoding::UTF_16BE.replicate(name) } + e.name.should == name + Encoding.find(name).should == e + + "a".dup.force_encoding(e).valid_encoding?.should be_false + "\x30\x42".dup.force_encoding(e).valid_encoding?.should be_true + "\x80".dup.force_encoding(e).valid_encoding?.should be_false + end + + it "returns a replica of ISO-2022-JP" do + name = @prefix + 'ISO-2022-JP' + e = suppress_warning { Encoding::ISO_2022_JP.replicate(name) } + Encoding.find(name).should == e + + e.name.should == name + e.dummy?.should be_true + end + + # NOTE: it's unclear of the value of this (for the complexity cost of it), + # but it is the current CRuby behavior. + it "can be associated with a String" do + name = @prefix + '-US-ASCII' + e = suppress_warning { Encoding::US_ASCII.replicate(name) } + e.name.should == name + Encoding.find(name).should == e + + s = "abc".dup.force_encoding(e) + s.encoding.should == e + s.encoding.name.should == name + end + end + + ruby_version_is ""..."3.3" do + it "warns about deprecation" do + -> { + Encoding::US_ASCII.replicate('MY-US-ASCII') + }.should complain(/warning: Encoding#replicate is deprecated and will be removed in Ruby 3.3; use the original encoding instead/) + end + + it "raises EncodingError if too many encodings" do + code = '1_000.times {|i| Encoding::US_ASCII.replicate("R_#{i}") }' + ruby_exe(code, args: "2>&1", exit_status: 1).should.include?('too many encoding (> 256) (EncodingError)') + end + end + + ruby_version_is "3.3" do + it "has been removed" do + Encoding::US_ASCII.should_not.respond_to?(:replicate, true) + end + end +end diff --git a/spec/ruby/core/encoding/shared/name.rb b/spec/ruby/core/encoding/shared/name.rb new file mode 100644 index 0000000000..cd37ea06db --- /dev/null +++ b/spec/ruby/core/encoding/shared/name.rb @@ -0,0 +1,15 @@ +require_relative '../../../spec_helper' + +describe :encoding_name, shared: true do + it "returns a String" do + Encoding.list.each do |e| + e.send(@method).should be_an_instance_of(String) + end + end + + it "uniquely identifies an encoding" do + Encoding.list.each do |e| + e.should == Encoding.find(e.send(@method)) + end + end +end diff --git a/spec/ruby/core/encoding/to_s_spec.rb b/spec/ruby/core/encoding/to_s_spec.rb new file mode 100644 index 0000000000..bab394a888 --- /dev/null +++ b/spec/ruby/core/encoding/to_s_spec.rb @@ -0,0 +1,6 @@ +require_relative "../../spec_helper" +require_relative 'shared/name' + +describe "Encoding#to_s" do + it_behaves_like :encoding_name, :to_s +end diff --git a/spec/ruby/core/encoding/undefined_conversion_error/destination_encoding_name_spec.rb b/spec/ruby/core/encoding/undefined_conversion_error/destination_encoding_name_spec.rb new file mode 100644 index 0000000000..a51a9f46a0 --- /dev/null +++ b/spec/ruby/core/encoding/undefined_conversion_error/destination_encoding_name_spec.rb @@ -0,0 +1,16 @@ +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::UndefinedConversionError#destination_encoding_name" do + before :each do + @exception = EncodingSpecs::UndefinedConversionError.exception + end + + it "returns a String" do + @exception.destination_encoding_name.should be_an_instance_of(String) + end + + it "is equal to the destination encoding name of the object that raised it" do + @exception.destination_encoding_name.should == "US-ASCII" + end +end diff --git a/spec/ruby/core/encoding/undefined_conversion_error/destination_encoding_spec.rb b/spec/ruby/core/encoding/undefined_conversion_error/destination_encoding_spec.rb new file mode 100644 index 0000000000..905556407c --- /dev/null +++ b/spec/ruby/core/encoding/undefined_conversion_error/destination_encoding_spec.rb @@ -0,0 +1,16 @@ +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::UndefinedConversionError#destination_encoding" do + before :each do + @exception = EncodingSpecs::UndefinedConversionError.exception + end + + it "returns an Encoding object" do + @exception.destination_encoding.should be_an_instance_of(Encoding) + end + + it "is equal to the destination encoding of the object that raised it" do + @exception.destination_encoding.should == Encoding::US_ASCII + end +end diff --git a/spec/ruby/core/encoding/undefined_conversion_error/error_char_spec.rb b/spec/ruby/core/encoding/undefined_conversion_error/error_char_spec.rb new file mode 100644 index 0000000000..9cb55e6d95 --- /dev/null +++ b/spec/ruby/core/encoding/undefined_conversion_error/error_char_spec.rb @@ -0,0 +1,28 @@ +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::UndefinedConversionError#error_char" do + before :each do + @exception = EncodingSpecs::UndefinedConversionError.exception + @exception2 = EncodingSpecs::UndefinedConversionErrorIndirect.exception + end + + it "returns a String" do + @exception.error_char.should be_an_instance_of(String) + @exception2.error_char.should be_an_instance_of(String) + end + + it "returns the one-character String that caused the exception" do + @exception.error_char.size.should == 1 + @exception.error_char.should == "\u{8765}" + + @exception2.error_char.size.should == 1 + @exception2.error_char.should == "\u{A0}" + end + + it "uses the source encoding" do + @exception.error_char.encoding.should == @exception.source_encoding + + @exception2.error_char.encoding.should == @exception2.source_encoding + end +end diff --git a/spec/ruby/core/encoding/undefined_conversion_error/source_encoding_name_spec.rb b/spec/ruby/core/encoding/undefined_conversion_error/source_encoding_name_spec.rb new file mode 100644 index 0000000000..d5e60e78db --- /dev/null +++ b/spec/ruby/core/encoding/undefined_conversion_error/source_encoding_name_spec.rb @@ -0,0 +1,29 @@ +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::UndefinedConversionError#source_encoding_name" do + before :each do + @exception = EncodingSpecs::UndefinedConversionError.exception + @exception2 = EncodingSpecs::UndefinedConversionErrorIndirect.exception + end + + it "returns a String" do + @exception.source_encoding_name.should be_an_instance_of(String) + end + + it "is equal to the source encoding name of the object that raised it" do + @exception.source_encoding_name.should == "UTF-8" + end + + # The source encoding specified in the Encoding::Converter constructor may + # differ from the source encoding returned here. What seems to happen is + # that when transcoding along a path with multiple pairs of encodings, the + # last one encountered when the error occurred is returned. So in this + # case, the conversion path is ISO-8859-1 -> UTF-8 -> EUC-JP. The + # conversion from ISO-8859-1 -> UTF-8 succeeded, but the conversion from + # UTF-8 to EUC-JP failed. IOW, it failed when the source encoding was + # UTF-8, so UTF-8 is regarded as the source encoding. + it "is equal to the source encoding at the stage of the conversion path where the error occurred" do + @exception2.source_encoding_name.should == 'UTF-8' + end +end diff --git a/spec/ruby/core/encoding/undefined_conversion_error/source_encoding_spec.rb b/spec/ruby/core/encoding/undefined_conversion_error/source_encoding_spec.rb new file mode 100644 index 0000000000..de456a4b5a --- /dev/null +++ b/spec/ruby/core/encoding/undefined_conversion_error/source_encoding_spec.rb @@ -0,0 +1,30 @@ +require_relative "../../../spec_helper" +require_relative '../fixtures/classes' + +describe "Encoding::UndefinedConversionError#source_encoding" do + before :each do + @exception = EncodingSpecs::UndefinedConversionError.exception + @exception2 = EncodingSpecs::UndefinedConversionErrorIndirect.exception + end + + it "returns an Encoding object" do + @exception.source_encoding.should be_an_instance_of(Encoding) + @exception2.source_encoding.should be_an_instance_of(Encoding) + end + + it "is equal to the source encoding of the object that raised it" do + @exception.source_encoding.should == Encoding::UTF_8 + end + + # The source encoding specified in the Encoding::Converter constructor may + # differ from the source encoding returned here. What seems to happen is + # that when transcoding along a path with multiple pairs of encodings, the + # last one encountered when the error occurred is returned. So in this + # case, the conversion path is ISO-8859-1 -> UTF-8 -> EUC-JP. The + # conversion from ISO-8859-1 -> UTF-8 succeeded, but the conversion from + # UTF-8 to EUC-JP failed. IOW, it failed when the source encoding was + # UTF-8, so UTF-8 is regarded as the source encoding. + it "is equal to the source encoding at the stage of the conversion path where the error occurred" do + @exception2.source_encoding.should == Encoding::UTF_8 + end +end |
