summaryrefslogtreecommitdiff
path: root/spec/ruby/core/string/encode_spec.rb
diff options
context:
space:
mode:
Diffstat (limited to 'spec/ruby/core/string/encode_spec.rb')
-rw-r--r--spec/ruby/core/string/encode_spec.rb76
1 files changed, 60 insertions, 16 deletions
diff --git a/spec/ruby/core/string/encode_spec.rb b/spec/ruby/core/string/encode_spec.rb
index ae641b2110..97dd753b62 100644
--- a/spec/ruby/core/string/encode_spec.rb
+++ b/spec/ruby/core/string/encode_spec.rb
@@ -34,8 +34,8 @@ describe "String#encode" do
it "encodes an ascii substring of a binary string to UTF-8" do
x82 = [0x82].pack('C')
- str = "#{x82}foo".force_encoding("binary")[1..-1].encode("utf-8")
- str.should == "foo".force_encoding("utf-8")
+ str = "#{x82}foo".dup.force_encoding("binary")[1..-1].encode("utf-8")
+ str.should == "foo".dup.force_encoding("utf-8")
str.encoding.should equal(Encoding::UTF_8)
end
end
@@ -49,7 +49,7 @@ describe "String#encode" do
end
it "round trips a String" do
- str = "abc def".force_encoding Encoding::US_ASCII
+ str = "abc def".dup.force_encoding Encoding::US_ASCII
str.encode("utf-32be").encode("ascii").should == "abc def"
end
end
@@ -67,17 +67,62 @@ describe "String#encode" do
"\rfoo".encode(universal_newline: true).should == "\nfoo"
end
- it "replaces invalid encoding" do
- encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "?")
- encoded.should == "\u3061??".encode("UTF-16LE")
- encoded.encode("UTF-8").should == "ち??"
+ it "replaces invalid encoding in source with default replacement" do
+ encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace)
+ encoded.should == "\u3061\ufffd\ufffd".encode("UTF-16LE")
+ encoded.encode("UTF-8").should == "ち\ufffd\ufffd"
+ end
+
+ it "replaces invalid encoding in source with a specified replacement" do
+ encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo")
+ encoded.should == "\u3061foofoo".encode("UTF-16LE")
+ encoded.encode("UTF-8").should == "ちfoofoo"
+ end
+
+ it "replace multiple invalid bytes at the end with a single replacement character" do
+ "\xE3\x81\x93\xE3\x81".encode("UTF-8", invalid: :replace).should == "\u3053\ufffd"
+ end
+
+ it "replaces invalid encoding in source using a specified replacement even when a fallback is given" do
+ encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: -> c { "bar" })
+ encoded.should == "\u3061foofoo".encode("UTF-16LE")
+ encoded.encode("UTF-8").should == "ちfoofoo"
+ end
+
+ it "replaces undefined encoding in destination with default replacement" do
+ encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace)
+ encoded.should == "B?".encode(Encoding::US_ASCII)
+ encoded.encode("UTF-8").should == "B?"
+ end
+
+ it "replaces undefined encoding in destination with a specified replacement" do
+ encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo")
+ encoded.should == "Bfoo".encode(Encoding::US_ASCII)
+ encoded.encode("UTF-8").should == "Bfoo"
+ end
+
+ it "replaces undefined encoding in destination with a specified replacement even if a fallback is given" do
+ encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo", fallback: proc {|x| "bar"})
+ encoded.should == "Bfoo".encode(Encoding::US_ASCII)
+ encoded.encode("UTF-8").should == "Bfoo"
+ end
+
+ it "replaces undefined encoding in destination using a fallback proc" do
+ encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: proc {|x| "bar"})
+ encoded.should == "Bbar".encode(Encoding::US_ASCII)
+ encoded.encode("UTF-8").should == "Bbar"
+ end
+
+ it "replaces invalid encoding in source using replace even when fallback is given as proc" do
+ encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: proc {|x| "bar"})
+ encoded.should == "\u3061foofoo".encode("UTF-16LE")
+ encoded.encode("UTF-8").should == "ちfoofoo"
end
end
describe "when passed to, from" do
it "returns a copy in the destination encoding when both encodings are the same" do
- str = "あ"
- str.force_encoding("binary")
+ str = "あ".dup.force_encoding("binary")
encoded = str.encode("utf-8", "utf-8")
encoded.should_not equal(str)
@@ -109,8 +154,7 @@ describe "String#encode" do
end
it "returns a copy in the destination encoding when both encodings are the same" do
- str = "あ"
- str.force_encoding("binary")
+ str = "あ".dup.force_encoding("binary")
encoded = str.encode("utf-8", "utf-8", invalid: :replace)
encoded.should_not equal(str)
@@ -145,13 +189,13 @@ describe "String#encode!" do
describe "when passed no options" do
it "returns self when Encoding.default_internal is nil" do
Encoding.default_internal = nil
- str = "あ"
+ str = +"あ"
str.encode!.should equal(str)
end
it "returns self for a ASCII-only String when Encoding.default_internal is nil" do
Encoding.default_internal = nil
- str = "abc"
+ str = +"abc"
str.encode!.should equal(str)
end
end
@@ -159,14 +203,14 @@ describe "String#encode!" do
describe "when passed options" do
it "returns self for ASCII-only String when Encoding.default_internal is nil" do
Encoding.default_internal = nil
- str = "abc"
+ str = +"abc"
str.encode!(invalid: :replace).should equal(str)
end
end
describe "when passed to encoding" do
it "returns self" do
- str = "abc"
+ str = +"abc"
result = str.encode!(Encoding::BINARY)
result.encoding.should equal(Encoding::BINARY)
result.should equal(str)
@@ -175,7 +219,7 @@ describe "String#encode!" do
describe "when passed to, from" do
it "returns self" do
- str = "ああ"
+ str = +"ああ"
result = str.encode!("euc-jp", "utf-8")
result.encoding.should equal(Encoding::EUC_JP)
result.should equal(str)