diff options
Diffstat (limited to 'spec/ruby/core/string/encode_spec.rb')
-rw-r--r-- | spec/ruby/core/string/encode_spec.rb | 76 |
1 files changed, 60 insertions, 16 deletions
diff --git a/spec/ruby/core/string/encode_spec.rb b/spec/ruby/core/string/encode_spec.rb index ae641b2110..97dd753b62 100644 --- a/spec/ruby/core/string/encode_spec.rb +++ b/spec/ruby/core/string/encode_spec.rb @@ -34,8 +34,8 @@ describe "String#encode" do it "encodes an ascii substring of a binary string to UTF-8" do x82 = [0x82].pack('C') - str = "#{x82}foo".force_encoding("binary")[1..-1].encode("utf-8") - str.should == "foo".force_encoding("utf-8") + str = "#{x82}foo".dup.force_encoding("binary")[1..-1].encode("utf-8") + str.should == "foo".dup.force_encoding("utf-8") str.encoding.should equal(Encoding::UTF_8) end end @@ -49,7 +49,7 @@ describe "String#encode" do end it "round trips a String" do - str = "abc def".force_encoding Encoding::US_ASCII + str = "abc def".dup.force_encoding Encoding::US_ASCII str.encode("utf-32be").encode("ascii").should == "abc def" end end @@ -67,17 +67,62 @@ describe "String#encode" do "\rfoo".encode(universal_newline: true).should == "\nfoo" end - it "replaces invalid encoding" do - encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "?") - encoded.should == "\u3061??".encode("UTF-16LE") - encoded.encode("UTF-8").should == "ち??" + it "replaces invalid encoding in source with default replacement" do + encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace) + encoded.should == "\u3061\ufffd\ufffd".encode("UTF-16LE") + encoded.encode("UTF-8").should == "ち\ufffd\ufffd" + end + + it "replaces invalid encoding in source with a specified replacement" do + encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo") + encoded.should == "\u3061foofoo".encode("UTF-16LE") + encoded.encode("UTF-8").should == "ちfoofoo" + end + + it "replace multiple invalid bytes at the end with a single replacement character" do + "\xE3\x81\x93\xE3\x81".encode("UTF-8", invalid: :replace).should == "\u3053\ufffd" + end + + it "replaces invalid encoding in source using a specified replacement even when a fallback is given" do + encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: -> c { "bar" }) + encoded.should == "\u3061foofoo".encode("UTF-16LE") + encoded.encode("UTF-8").should == "ちfoofoo" + end + + it "replaces undefined encoding in destination with default replacement" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace) + encoded.should == "B?".encode(Encoding::US_ASCII) + encoded.encode("UTF-8").should == "B?" + end + + it "replaces undefined encoding in destination with a specified replacement" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo") + encoded.should == "Bfoo".encode(Encoding::US_ASCII) + encoded.encode("UTF-8").should == "Bfoo" + end + + it "replaces undefined encoding in destination with a specified replacement even if a fallback is given" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo", fallback: proc {|x| "bar"}) + encoded.should == "Bfoo".encode(Encoding::US_ASCII) + encoded.encode("UTF-8").should == "Bfoo" + end + + it "replaces undefined encoding in destination using a fallback proc" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: proc {|x| "bar"}) + encoded.should == "Bbar".encode(Encoding::US_ASCII) + encoded.encode("UTF-8").should == "Bbar" + end + + it "replaces invalid encoding in source using replace even when fallback is given as proc" do + encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: proc {|x| "bar"}) + encoded.should == "\u3061foofoo".encode("UTF-16LE") + encoded.encode("UTF-8").should == "ちfoofoo" end end describe "when passed to, from" do it "returns a copy in the destination encoding when both encodings are the same" do - str = "あ" - str.force_encoding("binary") + str = "あ".dup.force_encoding("binary") encoded = str.encode("utf-8", "utf-8") encoded.should_not equal(str) @@ -109,8 +154,7 @@ describe "String#encode" do end it "returns a copy in the destination encoding when both encodings are the same" do - str = "あ" - str.force_encoding("binary") + str = "あ".dup.force_encoding("binary") encoded = str.encode("utf-8", "utf-8", invalid: :replace) encoded.should_not equal(str) @@ -145,13 +189,13 @@ describe "String#encode!" do describe "when passed no options" do it "returns self when Encoding.default_internal is nil" do Encoding.default_internal = nil - str = "あ" + str = +"あ" str.encode!.should equal(str) end it "returns self for a ASCII-only String when Encoding.default_internal is nil" do Encoding.default_internal = nil - str = "abc" + str = +"abc" str.encode!.should equal(str) end end @@ -159,14 +203,14 @@ describe "String#encode!" do describe "when passed options" do it "returns self for ASCII-only String when Encoding.default_internal is nil" do Encoding.default_internal = nil - str = "abc" + str = +"abc" str.encode!(invalid: :replace).should equal(str) end end describe "when passed to encoding" do it "returns self" do - str = "abc" + str = +"abc" result = str.encode!(Encoding::BINARY) result.encoding.should equal(Encoding::BINARY) result.should equal(str) @@ -175,7 +219,7 @@ describe "String#encode!" do describe "when passed to, from" do it "returns self" do - str = "ああ" + str = +"ああ" result = str.encode!("euc-jp", "utf-8") result.encoding.should equal(Encoding::EUC_JP) result.should equal(str) |