diff options
Diffstat (limited to 'spec/ruby/core/string')
119 files changed, 2994 insertions, 1077 deletions
diff --git a/spec/ruby/core/string/append_as_bytes_spec.rb b/spec/ruby/core/string/append_as_bytes_spec.rb new file mode 100644 index 0000000000..b1703e5f89 --- /dev/null +++ b/spec/ruby/core/string/append_as_bytes_spec.rb @@ -0,0 +1,58 @@ +require_relative '../../spec_helper' + +describe "String#append_bytes" do + ruby_version_is "3.4" do + it "doesn't allow to mutate frozen strings" do + str = "hello".freeze + -> { str.append_as_bytes("\xE2\x82") }.should raise_error(FrozenError) + end + + it "allows creating broken strings" do + str = +"hello" + str.append_as_bytes("\xE2\x82") + str.valid_encoding?.should == false + + str.append_as_bytes("\xAC") + str.valid_encoding?.should == true + + str = "abc".encode(Encoding::UTF_32LE) + str.append_as_bytes("def") + str.encoding.should == Encoding::UTF_32LE + str.valid_encoding?.should == false + end + + it "never changes the receiver encoding" do + str = "".b + str.append_as_bytes("€") + str.encoding.should == Encoding::BINARY + end + + it "accepts variadic String or Integer arguments" do + str = "hello".b + str.append_as_bytes("\xE2\x82", 12, 43, "\xAC") + str.encoding.should == Encoding::BINARY + str.should == "hello\xE2\x82\f+\xAC".b + end + + it "truncates integers to the least significant byte" do + str = +"" + str.append_as_bytes(0x131, 0x232, 0x333, bignum_value, bignum_value(1)) + str.bytes.should == [0x31, 0x32, 0x33, 0, 1] + end + + it "wraps negative integers" do + str = "".b + str.append_as_bytes(-1, -bignum_value, -bignum_value(1)) + str.bytes.should == [0xFF, 0, 0xFF] + end + + it "only accepts strings or integers, and doesn't attempt to cast with #to_str or #to_int" do + to_str = mock("to_str") + to_str.should_not_receive(:to_str) + to_str.should_not_receive(:to_int) + + str = +"hello" + -> { str.append_as_bytes(to_str) }.should raise_error(TypeError, "wrong argument type MockObject (expected String or Integer)") + end + end +end diff --git a/spec/ruby/core/string/append_spec.rb b/spec/ruby/core/string/append_spec.rb index e001257621..8497ce8262 100644 --- a/spec/ruby/core/string/append_spec.rb +++ b/spec/ruby/core/string/append_spec.rb @@ -5,6 +5,7 @@ require_relative 'shared/concat' describe "String#<<" do it_behaves_like :string_concat, :<< it_behaves_like :string_concat_encoding, :<< + it_behaves_like :string_concat_type_coercion, :<< it "raises an ArgumentError when given the incorrect number of arguments" do -> { "hello".send(:<<) }.should raise_error(ArgumentError) diff --git a/spec/ruby/core/string/ascii_only_spec.rb b/spec/ruby/core/string/ascii_only_spec.rb index c7e02fd874..88a0559cfd 100644 --- a/spec/ruby/core/string/ascii_only_spec.rb +++ b/spec/ruby/core/string/ascii_only_spec.rb @@ -7,12 +7,12 @@ describe "String#ascii_only?" do it "returns true if the encoding is UTF-8" do [ ["hello", true], ["hello".encode('UTF-8'), true], - ["hello".force_encoding('UTF-8'), true], + ["hello".dup.force_encoding('UTF-8'), true], ].should be_computed_by(:ascii_only?) end it "returns true if the encoding is US-ASCII" do - "hello".force_encoding(Encoding::US_ASCII).ascii_only?.should be_true + "hello".dup.force_encoding(Encoding::US_ASCII).ascii_only?.should be_true "hello".encode(Encoding::US_ASCII).ascii_only?.should be_true end @@ -34,13 +34,13 @@ describe "String#ascii_only?" do [ ["\u{6666}", false], ["hello, \u{6666}", false], ["\u{6666}".encode('UTF-8'), false], - ["\u{6666}".force_encoding('UTF-8'), false], + ["\u{6666}".dup.force_encoding('UTF-8'), false], ].should be_computed_by(:ascii_only?) end it "returns false if the encoding is US-ASCII" do - [ ["\u{6666}".force_encoding(Encoding::US_ASCII), false], - ["hello, \u{6666}".force_encoding(Encoding::US_ASCII), false], + [ ["\u{6666}".dup.force_encoding(Encoding::US_ASCII), false], + ["hello, \u{6666}".dup.force_encoding(Encoding::US_ASCII), false], ].should be_computed_by(:ascii_only?) end end @@ -51,17 +51,16 @@ describe "String#ascii_only?" do end it "returns false for the empty String with a non-ASCII-compatible encoding" do - "".force_encoding('UTF-16LE').ascii_only?.should be_false + "".dup.force_encoding('UTF-16LE').ascii_only?.should be_false "".encode('UTF-16BE').ascii_only?.should be_false end it "returns false for a non-empty String with non-ASCII-compatible encoding" do - "\x78\x00".force_encoding("UTF-16LE").ascii_only?.should be_false + "\x78\x00".dup.force_encoding("UTF-16LE").ascii_only?.should be_false end it "returns false when interpolating non ascii strings" do - base = "EU currency is" - base.force_encoding(Encoding::US_ASCII) + base = "EU currency is".dup.force_encoding(Encoding::US_ASCII) euro = "\u20AC" interp = "#{base} #{euro}" euro.ascii_only?.should be_false @@ -70,14 +69,14 @@ describe "String#ascii_only?" do end it "returns false after appending non ASCII characters to an empty String" do - ("" << "λ").ascii_only?.should be_false + ("".dup << "λ").ascii_only?.should be_false end it "returns false when concatenating an ASCII and non-ASCII String" do - "".concat("λ").ascii_only?.should be_false + "".dup.concat("λ").ascii_only?.should be_false end it "returns false when replacing an ASCII String with a non-ASCII String" do - "".replace("λ").ascii_only?.should be_false + "".dup.replace("λ").ascii_only?.should be_false end end diff --git a/spec/ruby/core/string/b_spec.rb b/spec/ruby/core/string/b_spec.rb index 37c7994700..4b1fafff11 100644 --- a/spec/ruby/core/string/b_spec.rb +++ b/spec/ruby/core/string/b_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' describe "String#b" do diff --git a/spec/ruby/core/string/byteindex_spec.rb b/spec/ruby/core/string/byteindex_spec.rb new file mode 100644 index 0000000000..d420f3f683 --- /dev/null +++ b/spec/ruby/core/string/byteindex_spec.rb @@ -0,0 +1,298 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/byte_index_common.rb' + +describe "String#byteindex" do + it "calls #to_str to convert the first argument" do + char = mock("string index char") + char.should_receive(:to_str).and_return("b") + "abc".byteindex(char).should == 1 + end + + it "calls #to_int to convert the second argument" do + offset = mock("string index offset") + offset.should_receive(:to_int).and_return(1) + "abc".byteindex("c", offset).should == 2 + end + + it "does not raise IndexError when byte offset is correct or on string boundary" do + "わ".byteindex("").should == 0 + "わ".byteindex("", 0).should == 0 + "わ".byteindex("", 3).should == 3 + end + + it_behaves_like :byte_index_common, :byteindex +end + +describe "String#byteindex with String" do + it "behaves the same as String#byteindex(char) for one-character strings" do + "blablabla hello cruel world...!".split("").uniq.each do |str| + chr = str[0] + str.byteindex(str).should == str.byteindex(chr) + + 0.upto(str.size + 1) do |start| + str.byteindex(str, start).should == str.byteindex(chr, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byteindex(str, start).should == str.byteindex(chr, start) + end + end + end + + it "returns the byteindex of the first occurrence of the given substring" do + "blablabla".byteindex("").should == 0 + "blablabla".byteindex("b").should == 0 + "blablabla".byteindex("bla").should == 0 + "blablabla".byteindex("blabla").should == 0 + "blablabla".byteindex("blablabla").should == 0 + + "blablabla".byteindex("l").should == 1 + "blablabla".byteindex("la").should == 1 + "blablabla".byteindex("labla").should == 1 + "blablabla".byteindex("lablabla").should == 1 + + "blablabla".byteindex("a").should == 2 + "blablabla".byteindex("abla").should == 2 + "blablabla".byteindex("ablabla").should == 2 + end + + it "treats the offset as a byteindex" do + "aaaaa".byteindex("a", 0).should == 0 + "aaaaa".byteindex("a", 2).should == 2 + "aaaaa".byteindex("a", 4).should == 4 + end + + it "ignores string subclasses" do + "blablabla".byteindex(StringSpecs::MyString.new("bla")).should == 0 + StringSpecs::MyString.new("blablabla").byteindex("bla").should == 0 + StringSpecs::MyString.new("blablabla").byteindex(StringSpecs::MyString.new("bla")).should == 0 + end + + it "starts the search at the given offset" do + "blablabla".byteindex("bl", 0).should == 0 + "blablabla".byteindex("bl", 1).should == 3 + "blablabla".byteindex("bl", 2).should == 3 + "blablabla".byteindex("bl", 3).should == 3 + + "blablabla".byteindex("bla", 0).should == 0 + "blablabla".byteindex("bla", 1).should == 3 + "blablabla".byteindex("bla", 2).should == 3 + "blablabla".byteindex("bla", 3).should == 3 + + "blablabla".byteindex("blab", 0).should == 0 + "blablabla".byteindex("blab", 1).should == 3 + "blablabla".byteindex("blab", 2).should == 3 + "blablabla".byteindex("blab", 3).should == 3 + + "blablabla".byteindex("la", 1).should == 1 + "blablabla".byteindex("la", 2).should == 4 + "blablabla".byteindex("la", 3).should == 4 + "blablabla".byteindex("la", 4).should == 4 + + "blablabla".byteindex("lab", 1).should == 1 + "blablabla".byteindex("lab", 2).should == 4 + "blablabla".byteindex("lab", 3).should == 4 + "blablabla".byteindex("lab", 4).should == 4 + + "blablabla".byteindex("ab", 2).should == 2 + "blablabla".byteindex("ab", 3).should == 5 + "blablabla".byteindex("ab", 4).should == 5 + "blablabla".byteindex("ab", 5).should == 5 + + "blablabla".byteindex("", 0).should == 0 + "blablabla".byteindex("", 1).should == 1 + "blablabla".byteindex("", 2).should == 2 + "blablabla".byteindex("", 7).should == 7 + "blablabla".byteindex("", 8).should == 8 + "blablabla".byteindex("", 9).should == 9 + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.byteindex(needle, offset).should == + str.byteindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".byteindex("B").should == nil + "blablabla".byteindex("z").should == nil + "blablabla".byteindex("BLA").should == nil + "blablabla".byteindex("blablablabla").should == nil + "blablabla".byteindex("", 10).should == nil + + "hello".byteindex("he", 1).should == nil + "hello".byteindex("he", 2).should == nil + "I’ve got a multibyte character.\n".byteindex("\n\n").should == nil + end + + it "returns the character byteindex of a multibyte character" do + "ありがとう".byteindex("が").should == 6 + end + + it "returns the character byteindex after offset" do + "われわれ".byteindex("わ", 3).should == 6 + "ありがとうありがとう".byteindex("が", 9).should == 21 + end + + it "returns the character byteindex after a partial first match" do + "</</h".byteindex("</h").should == 2 + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + char = "れ".encode Encoding::EUC_JP + -> do + "あれ".byteindex(char) + end.should raise_error(Encoding::CompatibilityError) + end + + it "handles a substring in a superset encoding" do + 'abc'.dup.force_encoding(Encoding::US_ASCII).byteindex('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.byteindex('t'.dup.force_encoding(Encoding::US_ASCII)).should == 2 + end +end + +describe "String#byteindex with Regexp" do + it "behaves the same as String#byteindex(string) for escaped string regexps" do + ["blablabla", "hello cruel world...!"].each do |str| + ["", "b", "bla", "lab", "o c", "d."].each do |needle| + regexp = Regexp.new(Regexp.escape(needle)) + str.byteindex(regexp).should == str.byteindex(needle) + + 0.upto(str.size + 1) do |start| + str.byteindex(regexp, start).should == str.byteindex(needle, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byteindex(regexp, start).should == str.byteindex(needle, start) + end + end + end + end + + it "returns the byteindex of the first match of regexp" do + "blablabla".byteindex(/bla/).should == 0 + "blablabla".byteindex(/BLA/i).should == 0 + + "blablabla".byteindex(/.{0}/).should == 0 + "blablabla".byteindex(/.{6}/).should == 0 + "blablabla".byteindex(/.{9}/).should == 0 + + "blablabla".byteindex(/.*/).should == 0 + "blablabla".byteindex(/.+/).should == 0 + + "blablabla".byteindex(/lab|b/).should == 0 + + not_supported_on :opal do + "blablabla".byteindex(/\A/).should == 0 + "blablabla".byteindex(/\Z/).should == 9 + "blablabla".byteindex(/\z/).should == 9 + "blablabla\n".byteindex(/\Z/).should == 9 + "blablabla\n".byteindex(/\z/).should == 10 + end + + "blablabla".byteindex(/^/).should == 0 + "\nblablabla".byteindex(/^/).should == 0 + "b\nablabla".byteindex(/$/).should == 1 + "bl\nablabla".byteindex(/$/).should == 2 + + "blablabla".byteindex(/.l./).should == 0 + end + + it "starts the search at the given offset" do + "blablabla".byteindex(/.{0}/, 5).should == 5 + "blablabla".byteindex(/.{1}/, 5).should == 5 + "blablabla".byteindex(/.{2}/, 5).should == 5 + "blablabla".byteindex(/.{3}/, 5).should == 5 + "blablabla".byteindex(/.{4}/, 5).should == 5 + + "blablabla".byteindex(/.{0}/, 3).should == 3 + "blablabla".byteindex(/.{1}/, 3).should == 3 + "blablabla".byteindex(/.{2}/, 3).should == 3 + "blablabla".byteindex(/.{5}/, 3).should == 3 + "blablabla".byteindex(/.{6}/, 3).should == 3 + + "blablabla".byteindex(/.l./, 0).should == 0 + "blablabla".byteindex(/.l./, 1).should == 3 + "blablabla".byteindex(/.l./, 2).should == 3 + "blablabla".byteindex(/.l./, 3).should == 3 + + "xblaxbla".byteindex(/x./, 0).should == 0 + "xblaxbla".byteindex(/x./, 1).should == 4 + "xblaxbla".byteindex(/x./, 2).should == 4 + + not_supported_on :opal do + "blablabla\n".byteindex(/\Z/, 9).should == 9 + end + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.byteindex(needle, offset).should == + str.byteindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".byteindex(/BLA/).should == nil + + "blablabla".byteindex(/.{10}/).should == nil + "blaxbla".byteindex(/.x/, 3).should == nil + "blaxbla".byteindex(/..x/, 2).should == nil + end + + it "returns nil if the Regexp matches the empty string and the offset is out of range" do + "ruby".byteindex(//, 12).should be_nil + end + + it "supports \\G which matches at the given start offset" do + "helloYOU.".byteindex(/\GYOU/, 5).should == 5 + "helloYOU.".byteindex(/\GYOU/).should == nil + + re = /\G.+YOU/ + # The # marks where \G will match. + [ + ["#hi!YOUall.", 0], + ["h#i!YOUall.", 1], + ["hi#!YOUall.", 2], + ["hi!#YOUall.", nil] + ].each do |spec| + + start = spec[0].byteindex("#") + str = spec[0].delete("#") + + str.byteindex(re, start).should == spec[1] + end + end + + it "converts start_offset to an integer via to_int" do + obj = mock('1') + obj.should_receive(:to_int).and_return(1) + "RWOARW".byteindex(/R./, obj).should == 4 + end + + it "returns the character byteindex of a multibyte character" do + "ありがとう".byteindex(/が/).should == 6 + end + + it "returns the character byteindex after offset" do + "われわれ".byteindex(/わ/, 3).should == 6 + end + + it "treats the offset as a byteindex" do + "われわわれ".byteindex(/わ/, 6).should == 6 + end +end diff --git a/spec/ruby/core/string/byterindex_spec.rb b/spec/ruby/core/string/byterindex_spec.rb new file mode 100644 index 0000000000..983222e35d --- /dev/null +++ b/spec/ruby/core/string/byterindex_spec.rb @@ -0,0 +1,353 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/byte_index_common.rb' + +describe "String#byterindex with object" do + it "tries to convert obj to a string via to_str" do + obj = mock('lo') + def obj.to_str() "lo" end + "hello".byterindex(obj).should == "hello".byterindex("lo") + + obj = mock('o') + def obj.respond_to?(arg, *) true end + def obj.method_missing(*args) "o" end + "hello".byterindex(obj).should == "hello".byterindex("o") + end + + it "calls #to_int to convert the second argument" do + offset = mock("string index offset") + offset.should_receive(:to_int).and_return(3) + "abc".byterindex("c", offset).should == 2 + end + + it "does not raise IndexError when byte offset is correct or on string boundary" do + "わ".byterindex("", 0).should == 0 + "わ".byterindex("", 3).should == 3 + "わ".byterindex("").should == 3 + end + + it_behaves_like :byte_index_common, :byterindex +end + +describe "String#byterindex with String" do + it "behaves the same as String#byterindex(char) for one-character strings" do + "blablabla hello cruel world...!".split("").uniq.each do |str| + chr = str[0] + str.byterindex(str).should == str.byterindex(chr) + + 0.upto(str.size + 1) do |start| + str.byterindex(str, start).should == str.byterindex(chr, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byterindex(str, start).should == str.byterindex(chr, start) + end + end + end + + it "behaves the same as String#byterindex(?char) for one-character strings" do + "blablabla hello cruel world...!".split("").uniq.each do |str| + chr = str[0] =~ / / ? str[0] : eval("?#{str[0]}") + str.byterindex(str).should == str.byterindex(chr) + + 0.upto(str.size + 1) do |start| + str.byterindex(str, start).should == str.byterindex(chr, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byterindex(str, start).should == str.byterindex(chr, start) + end + end + end + + it "returns the index of the last occurrence of the given substring" do + "blablabla".byterindex("").should == 9 + "blablabla".byterindex("a").should == 8 + "blablabla".byterindex("la").should == 7 + "blablabla".byterindex("bla").should == 6 + "blablabla".byterindex("abla").should == 5 + "blablabla".byterindex("labla").should == 4 + "blablabla".byterindex("blabla").should == 3 + "blablabla".byterindex("ablabla").should == 2 + "blablabla".byterindex("lablabla").should == 1 + "blablabla".byterindex("blablabla").should == 0 + + "blablabla".byterindex("l").should == 7 + "blablabla".byterindex("bl").should == 6 + "blablabla".byterindex("abl").should == 5 + "blablabla".byterindex("labl").should == 4 + "blablabla".byterindex("blabl").should == 3 + "blablabla".byterindex("ablabl").should == 2 + "blablabla".byterindex("lablabl").should == 1 + "blablabla".byterindex("blablabl").should == 0 + + "blablabla".byterindex("b").should == 6 + "blablabla".byterindex("ab").should == 5 + "blablabla".byterindex("lab").should == 4 + "blablabla".byterindex("blab").should == 3 + "blablabla".byterindex("ablab").should == 2 + "blablabla".byterindex("lablab").should == 1 + "blablabla".byterindex("blablab").should == 0 + end + + it "ignores string subclasses" do + "blablabla".byterindex(StringSpecs::MyString.new("bla")).should == 6 + StringSpecs::MyString.new("blablabla").byterindex("bla").should == 6 + StringSpecs::MyString.new("blablabla").byterindex(StringSpecs::MyString.new("bla")).should == 6 + end + + it "starts the search at the given offset" do + "blablabla".byterindex("bl", 0).should == 0 + "blablabla".byterindex("bl", 1).should == 0 + "blablabla".byterindex("bl", 2).should == 0 + "blablabla".byterindex("bl", 3).should == 3 + + "blablabla".byterindex("bla", 0).should == 0 + "blablabla".byterindex("bla", 1).should == 0 + "blablabla".byterindex("bla", 2).should == 0 + "blablabla".byterindex("bla", 3).should == 3 + + "blablabla".byterindex("blab", 0).should == 0 + "blablabla".byterindex("blab", 1).should == 0 + "blablabla".byterindex("blab", 2).should == 0 + "blablabla".byterindex("blab", 3).should == 3 + "blablabla".byterindex("blab", 6).should == 3 + "blablablax".byterindex("blab", 6).should == 3 + + "blablabla".byterindex("la", 1).should == 1 + "blablabla".byterindex("la", 2).should == 1 + "blablabla".byterindex("la", 3).should == 1 + "blablabla".byterindex("la", 4).should == 4 + + "blablabla".byterindex("lab", 1).should == 1 + "blablabla".byterindex("lab", 2).should == 1 + "blablabla".byterindex("lab", 3).should == 1 + "blablabla".byterindex("lab", 4).should == 4 + + "blablabla".byterindex("ab", 2).should == 2 + "blablabla".byterindex("ab", 3).should == 2 + "blablabla".byterindex("ab", 4).should == 2 + "blablabla".byterindex("ab", 5).should == 5 + + "blablabla".byterindex("", 0).should == 0 + "blablabla".byterindex("", 1).should == 1 + "blablabla".byterindex("", 2).should == 2 + "blablabla".byterindex("", 7).should == 7 + "blablabla".byterindex("", 8).should == 8 + "blablabla".byterindex("", 9).should == 9 + "blablabla".byterindex("", 10).should == 9 + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.byterindex(needle, offset).should == + str.byterindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".byterindex("B").should == nil + "blablabla".byterindex("z").should == nil + "blablabla".byterindex("BLA").should == nil + "blablabla".byterindex("blablablabla").should == nil + + "hello".byterindex("lo", 0).should == nil + "hello".byterindex("lo", 1).should == nil + "hello".byterindex("lo", 2).should == nil + + "hello".byterindex("llo", 0).should == nil + "hello".byterindex("llo", 1).should == nil + + "hello".byterindex("el", 0).should == nil + "hello".byterindex("ello", 0).should == nil + + "hello".byterindex("", -6).should == nil + "hello".byterindex("", -7).should == nil + + "hello".byterindex("h", -6).should == nil + end + + it "tries to convert start_offset to an integer via to_int" do + obj = mock('5') + def obj.to_int() 5 end + "str".byterindex("st", obj).should == 0 + + obj = mock('5') + def obj.respond_to?(arg, *) true end + def obj.method_missing(*args) 5 end + "str".byterindex("st", obj).should == 0 + end + + it "raises a TypeError when given offset is nil" do + -> { "str".byterindex("st", nil) }.should raise_error(TypeError) + end + + it "handles a substring in a superset encoding" do + 'abc'.dup.force_encoding(Encoding::US_ASCII).byterindex('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.byterindex('t'.dup.force_encoding(Encoding::US_ASCII)).should == 2 + end +end + +describe "String#byterindex with Regexp" do + it "behaves the same as String#byterindex(string) for escaped string regexps" do + ["blablabla", "hello cruel world...!"].each do |str| + ["", "b", "bla", "lab", "o c", "d."].each do |needle| + regexp = Regexp.new(Regexp.escape(needle)) + str.byterindex(regexp).should == str.byterindex(needle) + + 0.upto(str.size + 1) do |start| + str.byterindex(regexp, start).should == str.byterindex(needle, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byterindex(regexp, start).should == str.byterindex(needle, start) + end + end + end + end + + it "returns the index of the first match from the end of string of regexp" do + "blablabla".byterindex(/bla/).should == 6 + "blablabla".byterindex(/BLA/i).should == 6 + + "blablabla".byterindex(/.{0}/).should == 9 + "blablabla".byterindex(/.{1}/).should == 8 + "blablabla".byterindex(/.{2}/).should == 7 + "blablabla".byterindex(/.{6}/).should == 3 + "blablabla".byterindex(/.{9}/).should == 0 + + "blablabla".byterindex(/.*/).should == 9 + "blablabla".byterindex(/.+/).should == 8 + + "blablabla".byterindex(/bla|a/).should == 8 + + not_supported_on :opal do + "blablabla".byterindex(/\A/).should == 0 + "blablabla".byterindex(/\Z/).should == 9 + "blablabla".byterindex(/\z/).should == 9 + "blablabla\n".byterindex(/\Z/).should == 10 + "blablabla\n".byterindex(/\z/).should == 10 + end + + "blablabla".byterindex(/^/).should == 0 + not_supported_on :opal do + "\nblablabla".byterindex(/^/).should == 1 + "b\nlablabla".byterindex(/^/).should == 2 + end + "blablabla".byterindex(/$/).should == 9 + + "blablabla".byterindex(/.l./).should == 6 + end + + it "starts the search at the given offset" do + "blablabla".byterindex(/.{0}/, 5).should == 5 + "blablabla".byterindex(/.{1}/, 5).should == 5 + "blablabla".byterindex(/.{2}/, 5).should == 5 + "blablabla".byterindex(/.{3}/, 5).should == 5 + "blablabla".byterindex(/.{4}/, 5).should == 5 + + "blablabla".byterindex(/.{0}/, 3).should == 3 + "blablabla".byterindex(/.{1}/, 3).should == 3 + "blablabla".byterindex(/.{2}/, 3).should == 3 + "blablabla".byterindex(/.{5}/, 3).should == 3 + "blablabla".byterindex(/.{6}/, 3).should == 3 + + "blablabla".byterindex(/.l./, 0).should == 0 + "blablabla".byterindex(/.l./, 1).should == 0 + "blablabla".byterindex(/.l./, 2).should == 0 + "blablabla".byterindex(/.l./, 3).should == 3 + + "blablablax".byterindex(/.x/, 10).should == 8 + "blablablax".byterindex(/.x/, 9).should == 8 + "blablablax".byterindex(/.x/, 8).should == 8 + + "blablablax".byterindex(/..x/, 10).should == 7 + "blablablax".byterindex(/..x/, 9).should == 7 + "blablablax".byterindex(/..x/, 8).should == 7 + "blablablax".byterindex(/..x/, 7).should == 7 + + not_supported_on :opal do + "blablabla\n".byterindex(/\Z/, 9).should == 9 + end + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.byterindex(needle, offset).should == + str.byterindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".byterindex(/BLA/).should == nil + "blablabla".byterindex(/.{10}/).should == nil + "blablablax".byterindex(/.x/, 7).should == nil + "blablablax".byterindex(/..x/, 6).should == nil + + not_supported_on :opal do + "blablabla".byterindex(/\Z/, 5).should == nil + "blablabla".byterindex(/\z/, 5).should == nil + "blablabla\n".byterindex(/\z/, 9).should == nil + end + end + + not_supported_on :opal do + it "supports \\G which matches at the given start offset" do + "helloYOU.".byterindex(/YOU\G/, 8).should == 5 + "helloYOU.".byterindex(/YOU\G/).should == nil + + idx = "helloYOUall!".index("YOU") + re = /YOU.+\G.+/ + # The # marks where \G will match. + [ + ["helloYOU#all.", nil], + ["helloYOUa#ll.", idx], + ["helloYOUal#l.", idx], + ["helloYOUall#.", idx], + ["helloYOUall.#", nil] + ].each do |i| + start = i[0].index("#") + str = i[0].delete("#") + + str.byterindex(re, start).should == i[1] + end + end + end + + it "tries to convert start_offset to an integer" do + obj = mock('5') + def obj.to_int() 5 end + "str".byterindex(/../, obj).should == 1 + + obj = mock('5') + def obj.respond_to?(arg, *) true end + def obj.method_missing(*args); 5; end + "str".byterindex(/../, obj).should == 1 + end + + it "raises a TypeError when given offset is nil" do + -> { "str".byterindex(/../, nil) }.should raise_error(TypeError) + end + + it "returns the reverse byte index of a multibyte character" do + "ありがりがとう".byterindex("が").should == 12 + "ありがりがとう".byterindex(/が/).should == 12 + end + + it "returns the character index before the finish" do + "ありがりがとう".byterindex("が", 9).should == 6 + "ありがりがとう".byterindex(/が/, 9).should == 6 + end +end diff --git a/spec/ruby/core/string/bytes_spec.rb b/spec/ruby/core/string/bytes_spec.rb index 859b346550..02151eebbc 100644 --- a/spec/ruby/core/string/bytes_spec.rb +++ b/spec/ruby/core/string/bytes_spec.rb @@ -50,6 +50,6 @@ describe "String#bytes" do end it "is unaffected by #force_encoding" do - @utf8.force_encoding('ASCII').bytes.to_a.should == @utf8.bytes.to_a + @utf8.dup.force_encoding('ASCII').bytes.to_a.should == @utf8.bytes.to_a end end diff --git a/spec/ruby/core/string/bytesize_spec.rb b/spec/ruby/core/string/bytesize_spec.rb index a31f3ae671..2bbefc0820 100644 --- a/spec/ruby/core/string/bytesize_spec.rb +++ b/spec/ruby/core/string/bytesize_spec.rb @@ -13,21 +13,21 @@ describe "String#bytesize" do end it "works with pseudo-ASCII strings containing single UTF-8 characters" do - "\u{6666}".force_encoding('ASCII').bytesize.should == 3 + "\u{6666}".dup.force_encoding('ASCII').bytesize.should == 3 end it "works with strings containing UTF-8 characters" do - "c \u{6666}".force_encoding('UTF-8').bytesize.should == 5 + "c \u{6666}".dup.force_encoding('UTF-8').bytesize.should == 5 "c \u{6666}".bytesize.should == 5 end it "works with pseudo-ASCII strings containing UTF-8 characters" do - "c \u{6666}".force_encoding('ASCII').bytesize.should == 5 + "c \u{6666}".dup.force_encoding('ASCII').bytesize.should == 5 end it "returns 0 for the empty string" do "".bytesize.should == 0 - "".force_encoding('ASCII').bytesize.should == 0 - "".force_encoding('UTF-8').bytesize.should == 0 + "".dup.force_encoding('ASCII').bytesize.should == 0 + "".dup.force_encoding('UTF-8').bytesize.should == 0 end end diff --git a/spec/ruby/core/string/byteslice_spec.rb b/spec/ruby/core/string/byteslice_spec.rb index a49da040eb..4ad9e8d8f1 100644 --- a/spec/ruby/core/string/byteslice_spec.rb +++ b/spec/ruby/core/string/byteslice_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../spec_helper' require_relative 'fixtures/classes' require_relative 'shared/slice' @@ -17,11 +17,17 @@ describe "String#byteslice with Range" do it_behaves_like :string_slice_range, :byteslice end -describe "String#byteslice on on non ASCII strings" do +describe "String#byteslice on non ASCII strings" do it "returns byteslice of unicode strings" do - "\u3042".byteslice(1).should == "\x81".force_encoding("UTF-8") - "\u3042".byteslice(1, 2).should == "\x81\x82".force_encoding("UTF-8") - "\u3042".byteslice(1..2).should == "\x81\x82".force_encoding("UTF-8") - "\u3042".byteslice(-1).should == "\x82".force_encoding("UTF-8") + "\u3042".byteslice(1).should == "\x81".dup.force_encoding("UTF-8") + "\u3042".byteslice(1, 2).should == "\x81\x82".dup.force_encoding("UTF-8") + "\u3042".byteslice(1..2).should == "\x81\x82".dup.force_encoding("UTF-8") + "\u3042".byteslice(-1).should == "\x82".dup.force_encoding("UTF-8") + end + + it "returns a String in the same encoding as self" do + "ruby".encode("UTF-8").slice(0).encoding.should == Encoding::UTF_8 + "ruby".encode("US-ASCII").slice(0).encoding.should == Encoding::US_ASCII + "ruby".encode("Windows-1251").slice(0).encoding.should == Encoding::Windows_1251 end end diff --git a/spec/ruby/core/string/bytesplice_spec.rb b/spec/ruby/core/string/bytesplice_spec.rb new file mode 100644 index 0000000000..2c770e340a --- /dev/null +++ b/spec/ruby/core/string/bytesplice_spec.rb @@ -0,0 +1,294 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' + +describe "String#bytesplice" do + it "raises IndexError when index is less than -bytesize" do + -> { "hello".bytesplice(-6, 0, "xxx") }.should raise_error(IndexError, "index -6 out of string") + end + + it "raises IndexError when index is greater than bytesize" do + -> { "hello".bytesplice(6, 0, "xxx") }.should raise_error(IndexError, "index 6 out of string") + end + + it "raises IndexError for negative length" do + -> { "abc".bytesplice(0, -2, "") }.should raise_error(IndexError, "negative length -2") + end + + it "replaces with integer indices" do + "hello".bytesplice(-5, 0, "xxx").should == "xxxhello" + "hello".bytesplice(0, 0, "xxx").should == "xxxhello" + "hello".bytesplice(0, 1, "xxx").should == "xxxello" + "hello".bytesplice(0, 5, "xxx").should == "xxx" + "hello".bytesplice(0, 6, "xxx").should == "xxx" + end + + it "raises RangeError when range left boundary is less than -bytesize" do + -> { "hello".bytesplice(-6...-6, "xxx") }.should raise_error(RangeError, "-6...-6 out of range") + end + + it "replaces with ranges" do + "hello".bytesplice(-5...-5, "xxx").should == "xxxhello" + "hello".bytesplice(0...0, "xxx").should == "xxxhello" + "hello".bytesplice(0..0, "xxx").should == "xxxello" + "hello".bytesplice(0...1, "xxx").should == "xxxello" + "hello".bytesplice(0..1, "xxx").should == "xxxllo" + "hello".bytesplice(0..-1, "xxx").should == "xxx" + "hello".bytesplice(0...5, "xxx").should == "xxx" + "hello".bytesplice(0...6, "xxx").should == "xxx" + end + + it "raises TypeError when integer index is provided without length argument" do + -> { "hello".bytesplice(0, "xxx") }.should raise_error(TypeError, "wrong argument type Integer (expected Range)") + end + + it "replaces on an empty string" do + "".bytesplice(0, 0, "").should == "" + "".bytesplice(0, 0, "xxx").should == "xxx" + end + + it "mutates self" do + s = "hello" + s.bytesplice(2, 1, "xxx").should.equal?(s) + end + + it "raises when string is frozen" do + s = "hello".freeze + -> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") + end + + ruby_version_is "3.3" do + it "raises IndexError when str_index is less than -bytesize" do + -> { "hello".bytesplice(2, 1, "HELLO", -6, 0) }.should raise_error(IndexError, "index -6 out of string") + end + + it "raises IndexError when str_index is greater than bytesize" do + -> { "hello".bytesplice(2, 1, "HELLO", 6, 0) }.should raise_error(IndexError, "index 6 out of string") + end + + it "raises IndexError for negative str length" do + -> { "abc".bytesplice(0, 1, "", 0, -2) }.should raise_error(IndexError, "negative length -2") + end + + it "replaces with integer str indices" do + "hello".bytesplice(1, 2, "HELLO", -5, 0).should == "hlo" + "hello".bytesplice(1, 2, "HELLO", 0, 0).should == "hlo" + "hello".bytesplice(1, 2, "HELLO", 0, 1).should == "hHlo" + "hello".bytesplice(1, 2, "HELLO", 0, 5).should == "hHELLOlo" + "hello".bytesplice(1, 2, "HELLO", 0, 6).should == "hHELLOlo" + end + + it "raises RangeError when str range left boundary is less than -bytesize" do + -> { "hello".bytesplice(0..1, "HELLO", -6...-6) }.should raise_error(RangeError, "-6...-6 out of range") + end + + it "replaces with str ranges" do + "hello".bytesplice(1..2, "HELLO", -5...-5).should == "hlo" + "hello".bytesplice(1..2, "HELLO", 0...0).should == "hlo" + "hello".bytesplice(1..2, "HELLO", 0..0).should == "hHlo" + "hello".bytesplice(1..2, "HELLO", 0...1).should == "hHlo" + "hello".bytesplice(1..2, "HELLO", 0..1).should == "hHElo" + "hello".bytesplice(1..2, "HELLO", 0..-1).should == "hHELLOlo" + "hello".bytesplice(1..2, "HELLO", 0...5).should == "hHELLOlo" + "hello".bytesplice(1..2, "HELLO", 0...6).should == "hHELLOlo" + end + + it "raises ArgumentError when integer str index is provided without str length argument" do + -> { "hello".bytesplice(0, 1, "xxx", 0) }.should raise_error(ArgumentError, "wrong number of arguments (given 4, expected 2, 3, or 5)") + end + + it "replaces on an empty string with str index/length" do + "".bytesplice(0, 0, "", 0, 0).should == "" + "".bytesplice(0, 0, "xxx", 0, 1).should == "x" + end + + it "mutates self with substring and str index/length" do + s = "hello" + s.bytesplice(2, 1, "xxx", 1, 2).should.equal?(s) + s.should.eql?("hexxlo") + end + + it "raises when string is frozen and str index/length" do + s = "hello".freeze + -> { s.bytesplice(2, 1, "xxx", 0, 1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") + end + + it "replaces on an empty string with str range" do + "".bytesplice(0..0, "", 0..0).should == "" + "".bytesplice(0..0, "xyz", 0..1).should == "xy" + end + + it "mutates self with substring and str range" do + s = "hello" + s.bytesplice(2..2, "xyz", 1..2).should.equal?(s) + s.should.eql?("heyzlo") + end + + it "raises when string is frozen and str range" do + s = "hello".freeze + -> { s.bytesplice(2..2, "yzx", 0..1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") + end + end +end + +describe "String#bytesplice with multibyte characters" do + it "raises IndexError when index is out of byte size boundary" do + -> { "こんにちは".bytesplice(-16, 0, "xxx") }.should raise_error(IndexError, "index -16 out of string") + end + + it "raises IndexError when index is not on a codepoint boundary" do + -> { "こんにちは".bytesplice(1, 0, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary") + end + + it "raises IndexError when length is not matching the codepoint boundary" do + -> { "こんにちは".bytesplice(0, 1, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(0, 2, "xxx") }.should raise_error(IndexError, "offset 2 does not land on character boundary") + end + + it "replaces with integer indices" do + "こんにちは".bytesplice(-15, 0, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0, 0, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0, 3, "xxx").should == "xxxんにちは" + "こんにちは".bytesplice(3, 3, "はは").should == "こははにちは" + "こんにちは".bytesplice(15, 0, "xxx").should == "こんにちはxxx" + end + + it "replaces with range" do + "こんにちは".bytesplice(-15...-16, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0...0, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0..2, "xxx").should == "xxxんにちは" + "こんにちは".bytesplice(0...3, "xxx").should == "xxxんにちは" + "こんにちは".bytesplice(0..5, "xxx").should == "xxxにちは" + "こんにちは".bytesplice(0..-1, "xxx").should == "xxx" + "こんにちは".bytesplice(0...15, "xxx").should == "xxx" + "こんにちは".bytesplice(0...18, "xxx").should == "xxx" + end + + it "treats negative length for range as 0" do + "こんにちは".bytesplice(0...-100, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(3...-100, "xxx").should == "こxxxんにちは" + "こんにちは".bytesplice(-15...-100, "xxx").should == "xxxこんにちは" + end + + it "raises when ranges not match codepoint boundaries" do + -> { "こんにちは".bytesplice(0..0, "x") }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(0..1, "x") }.should raise_error(IndexError, "offset 2 does not land on character boundary") + # Begin is incorrect + -> { "こんにちは".bytesplice(-4..-1, "x") }.should raise_error(IndexError, "offset 11 does not land on character boundary") + -> { "こんにちは".bytesplice(-5..-1, "x") }.should raise_error(IndexError, "offset 10 does not land on character boundary") + # End is incorrect + -> { "こんにちは".bytesplice(-3..-2, "x") }.should raise_error(IndexError, "offset 14 does not land on character boundary") + -> { "こんにちは".bytesplice(-3..-3, "x") }.should raise_error(IndexError, "offset 13 does not land on character boundary") + end + + it "deals with a different encoded argument" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "xxxxxx" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(0, 3, sub) + result.should == "xxxxxxんにちは" + result.encoding.should == Encoding::UTF_8 + + s = "xxxxxx" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(0, 3, sub) + result.should == "こんにちはxxx" + result.encoding.should == Encoding::UTF_8 + end + + ruby_version_is "3.3" do + it "raises IndexError when str_index is out of byte size boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", -16, 0) }.should raise_error(IndexError, "index -16 out of string") + end + + it "raises IndexError when str_index is not on a codepoint boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 1, 0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + end + + it "raises IndexError when str_length is not matching the codepoint boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + end + + it "replaces with integer str indices" do + "こんにちは".bytesplice(3, 3, "こんにちは", -15, 0).should == "こにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 0, 0).should == "こにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 0, 3).should == "ここにちは" + "こんにちは".bytesplice(3, 3, "はは", 3, 3).should == "こはにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 15, 0).should == "こにちは" + end + + it "replaces with str range" do + "こんにちは".bytesplice(0..2, "こんにちは", -15...-16).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...0).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3..5).should == "んんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3...6).should == "んんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3..8).should == "んにんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0..-1).should == "こんにちはんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...15).should == "こんにちはんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...18).should == "こんにちはんにちは" + end + + it "treats negative length for str range as 0" do + "こんにちは".bytesplice(0..2, "こんにちは", 0...-100).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3...-100).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", -15...-100).should == "んにちは" + end + + it "raises when ranges not match codepoint boundaries in str" do + -> { "こんにちは".bytesplice(3...3, "こ", 0..0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こ", 0..1) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + # Begin is incorrect + -> { "こんにちは".bytesplice(3...3, "こんにちは", -4..-1) }.should raise_error(IndexError, "offset 11 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こんにちは", -5..-1) }.should raise_error(IndexError, "offset 10 does not land on character boundary") + # End is incorrect + -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-2) }.should raise_error(IndexError, "offset 14 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-3) }.should raise_error(IndexError, "offset 13 does not land on character boundary") + end + + it "deals with a different encoded argument with str index/length" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "goodbye" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(3, 3, sub, 0, 3) + result.should == "こgooにちは" + result.encoding.should == Encoding::UTF_8 + + s = "hello" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(1, 2, sub, 3, 3) + result.should == "hんlo" + result.encoding.should == Encoding::UTF_8 + end + + it "deals with a different encoded argument with str range" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "goodbye" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(3..5, sub, 0..2) + result.should == "こgooにちは" + result.encoding.should == Encoding::UTF_8 + + s = "hello" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(1..2, sub, 3..5) + result.should == "hんlo" + result.encoding.should == Encoding::UTF_8 + end + end +end diff --git a/spec/ruby/core/string/capitalize_spec.rb b/spec/ruby/core/string/capitalize_spec.rb index 751f4160a6..5e59b656c5 100644 --- a/spec/ruby/core/string/capitalize_spec.rb +++ b/spec/ruby/core/string/capitalize_spec.rb @@ -78,24 +78,19 @@ describe "String#capitalize" do -> { "abc".capitalize(:invalid_option) }.should raise_error(ArgumentError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("hello").capitalize.should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("Hello").capitalize.should be_an_instance_of(StringSpecs::MyString) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("hello").capitalize.should be_an_instance_of(String) + StringSpecs::MyString.new("Hello").capitalize.should be_an_instance_of(String) end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("hello").capitalize.should be_an_instance_of(String) - StringSpecs::MyString.new("Hello").capitalize.should be_an_instance_of(String) - end + it "returns a String in the same encoding as self" do + "h".encode("US-ASCII").capitalize.encoding.should == Encoding::US_ASCII end end describe "String#capitalize!" do it "capitalizes self in place" do - a = "hello" + a = +"hello" a.capitalize!.should equal(a) a.should == "Hello" end @@ -108,13 +103,13 @@ describe "String#capitalize!" do describe "full Unicode case mapping" do it "modifies self in place for all of Unicode with no option" do - a = "äöÜ" + a = +"äöÜ" a.capitalize! a.should == "Äöü" end it "only capitalizes the first resulting character when upcasing a character produces a multi-character sequence" do - a = "ß" + a = +"ß" a.capitalize! a.should == "Ss" end @@ -126,7 +121,7 @@ describe "String#capitalize!" do end it "updates string metadata" do - capitalized = "ßeT" + capitalized = +"ßeT" capitalized.capitalize! capitalized.should == "Sset" @@ -138,7 +133,7 @@ describe "String#capitalize!" do describe "modifies self in place for ASCII-only case mapping" do it "does not capitalize non-ASCII characters" do - a = "ßet" + a = +"ßet" a.capitalize!(:ascii) a.should == "ßet" end @@ -152,13 +147,13 @@ describe "String#capitalize!" do describe "modifies self in place for full Unicode case mapping adapted for Turkic languages" do it "capitalizes ASCII characters according to Turkic semantics" do - a = "iSa" + a = +"iSa" a.capitalize!(:turkic) a.should == "İsa" end it "allows Lithuanian as an extra option" do - a = "iSa" + a = +"iSa" a.capitalize!(:turkic, :lithuanian) a.should == "İsa" end @@ -170,13 +165,13 @@ describe "String#capitalize!" do describe "modifies self in place for full Unicode case mapping adapted for Lithuanian" do it "currently works the same as full Unicode case mapping" do - a = "iß" + a = +"iß" a.capitalize!(:lithuanian) a.should == "Iß" end it "allows Turkic as an extra option (and applies Turkic semantics)" do - a = "iß" + a = +"iß" a.capitalize!(:lithuanian, :turkic) a.should == "İß" end @@ -195,12 +190,12 @@ describe "String#capitalize!" do end it "returns nil when no changes are made" do - a = "Hello" + a = +"Hello" a.capitalize!.should == nil a.should == "Hello" - "".capitalize!.should == nil - "H".capitalize!.should == nil + (+"").capitalize!.should == nil + (+"H").capitalize!.should == nil end it "raises a FrozenError when self is frozen" do diff --git a/spec/ruby/core/string/casecmp_spec.rb b/spec/ruby/core/string/casecmp_spec.rb index 986fbc8718..81ebea557c 100644 --- a/spec/ruby/core/string/casecmp_spec.rb +++ b/spec/ruby/core/string/casecmp_spec.rb @@ -117,6 +117,11 @@ describe "String#casecmp independent of case" do "B".casecmp(a).should == 1 end end + + it "returns 0 for empty strings in different encodings" do + ''.b.casecmp('').should == 0 + ''.b.casecmp(''.encode("UTF-32LE")).should == 0 + end end describe 'String#casecmp? independent of case' do @@ -191,4 +196,9 @@ describe 'String#casecmp? independent of case' do it "returns nil if other can't be converted to a string" do "abc".casecmp?(mock('abc')).should be_nil end + + it "returns true for empty strings in different encodings" do + ''.b.should.casecmp?('') + ''.b.should.casecmp?(''.encode("UTF-32LE")) + end end diff --git a/spec/ruby/core/string/center_spec.rb b/spec/ruby/core/string/center_spec.rb index 76da6e1e09..1667b59327 100644 --- a/spec/ruby/core/string/center_spec.rb +++ b/spec/ruby/core/string/center_spec.rb @@ -81,31 +81,18 @@ describe "String#center with length, padding" do -> { "hello".center(0, "") }.should raise_error(ArgumentError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on subclasses" do - StringSpecs::MyString.new("").center(10).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").center(10).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(StringSpecs::MyString) - - "".center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - "foo".center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - end - end + it "returns String instances when called on subclasses" do + StringSpecs::MyString.new("").center(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").center(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - ruby_version_is '3.0' do - it "returns String instances when called on subclasses" do - StringSpecs::MyString.new("").center(10).should be_an_instance_of(String) - StringSpecs::MyString.new("foo").center(10).should be_an_instance_of(String) - StringSpecs::MyString.new("foo").center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - - "".center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - "foo".center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - end + "".center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + "foo".center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) end describe "with width" do it "returns a String in the same encoding as the original" do - str = "abc".force_encoding Encoding::IBM437 + str = "abc".dup.force_encoding Encoding::IBM437 result = str.center 6 result.should == " abc " result.encoding.should equal(Encoding::IBM437) @@ -114,7 +101,7 @@ describe "String#center with length, padding" do describe "with width, pattern" do it "returns a String in the compatible encoding" do - str = "abc".force_encoding Encoding::IBM437 + str = "abc".dup.force_encoding Encoding::IBM437 result = str.center 6, "あ" result.should == "あabcああ" result.encoding.should equal(Encoding::UTF_8) diff --git a/spec/ruby/core/string/chars_spec.rb b/spec/ruby/core/string/chars_spec.rb index e4f26bc0cc..ee85430574 100644 --- a/spec/ruby/core/string/chars_spec.rb +++ b/spec/ruby/core/string/chars_spec.rb @@ -1,5 +1,5 @@ +require_relative "../../spec_helper" require_relative 'shared/chars' -require_relative 'shared/each_char_without_block' describe "String#chars" do it_behaves_like :string_chars, :chars @@ -7,4 +7,10 @@ describe "String#chars" do it "returns an array when no block given" do "hello".chars.should == ['h', 'e', 'l', 'l', 'o'] end + + it "returns Strings in the same encoding as self" do + "hello".encode("US-ASCII").chars.each do |c| + c.encoding.should == Encoding::US_ASCII + end + end end diff --git a/spec/ruby/core/string/chilled_string_spec.rb b/spec/ruby/core/string/chilled_string_spec.rb new file mode 100644 index 0000000000..73d055cbdf --- /dev/null +++ b/spec/ruby/core/string/chilled_string_spec.rb @@ -0,0 +1,151 @@ +require_relative '../../spec_helper' + +describe "chilled String" do + guard -> { ruby_version_is "3.4" and !"test".equal?("test") } do + describe "chilled string literals" do + + describe "#frozen?" do + it "returns false" do + "chilled".frozen?.should == false + end + end + + describe "#-@" do + it "returns a different instance" do + input = "chilled" + interned = (-input) + interned.frozen?.should == true + interned.object_id.should_not == input.object_id + end + end + + describe "#+@" do + it "returns a different instance" do + input = "chilled" + duped = (+input) + duped.frozen?.should == false + duped.object_id.should_not == input.object_id + end + end + + describe "#clone" do + it "preserves chilled status" do + input = "chilled".clone + -> { + input << "-mutated" + }.should complain(/literal string will be frozen in the future/) + input.should == "chilled-mutated" + end + end + + describe "mutation" do + it "emits a warning" do + input = "chilled" + -> { + input << "-mutated" + }.should complain(/literal string will be frozen in the future/) + input.should == "chilled-mutated" + end + + it "emits a warning for concatenated strings" do + input = "still" "+chilled" + -> { + input << "-mutated" + }.should complain(/literal string will be frozen in the future/) + input.should == "still+chilled-mutated" + end + + it "emits a warning on singleton_class creation" do + -> { + "chilled".singleton_class + }.should complain(/literal string will be frozen in the future/) + end + + it "emits a warning on instance variable assignment" do + -> { + "chilled".instance_variable_set(:@ivar, 42) + }.should complain(/literal string will be frozen in the future/) + end + + it "raises FrozenError after the string was explicitly frozen" do + input = "chilled" + input.freeze + -> { + -> { + input << "mutated" + }.should raise_error(FrozenError) + }.should_not complain(/literal string will be frozen in the future/) + end + end + end + + describe "chilled strings returned by Symbol#to_s" do + + describe "#frozen?" do + it "returns false" do + :chilled.to_s.frozen?.should == false + end + end + + describe "#-@" do + it "returns a different instance" do + input = :chilled.to_s + interned = (-input) + interned.frozen?.should == true + interned.object_id.should_not == input.object_id + end + end + + describe "#+@" do + it "returns a different instance" do + input = :chilled.to_s + duped = (+input) + duped.frozen?.should == false + duped.object_id.should_not == input.object_id + end + end + + describe "#clone" do + it "preserves chilled status" do + input = :chilled.to_s.clone + -> { + input << "-mutated" + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + input.should == "chilled-mutated" + end + end + + describe "mutation" do + it "emits a warning" do + input = :chilled.to_s + -> { + input << "-mutated" + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + input.should == "chilled-mutated" + end + + it "emits a warning on singleton_class creation" do + -> { + :chilled.to_s.singleton_class + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + end + + it "emits a warning on instance variable assignment" do + -> { + :chilled.to_s.instance_variable_set(:@ivar, 42) + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + end + + it "raises FrozenError after the string was explicitly frozen" do + input = :chilled.to_s + input.freeze + -> { + -> { + input << "mutated" + }.should raise_error(FrozenError) + }.should_not complain(/string returned by :chilled\.to_s will be frozen in the future/) + end + end + end + end +end diff --git a/spec/ruby/core/string/chomp_spec.rb b/spec/ruby/core/string/chomp_spec.rb index c03bfc7951..d27c84c6f6 100644 --- a/spec/ruby/core/string/chomp_spec.rb +++ b/spec/ruby/core/string/chomp_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -40,18 +41,13 @@ describe "String#chomp" do "".chomp.should == "" end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - str = StringSpecs::MyString.new("hello\n").chomp - str.should be_an_instance_of(StringSpecs::MyString) - end + it "returns a String in the same encoding as self" do + "abc\n\n".encode("US-ASCII").chomp.encoding.should == Encoding::US_ASCII end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - str = StringSpecs::MyString.new("hello\n").chomp - str.should be_an_instance_of(String) - end + it "returns String instances when called on a subclass" do + str = StringSpecs::MyString.new("hello\n").chomp + str.should be_an_instance_of(String) end it "removes trailing characters that match $/ when it has been assigned a value" do diff --git a/spec/ruby/core/string/chop_spec.rb b/spec/ruby/core/string/chop_spec.rb index 266d973f67..99c2c82190 100644 --- a/spec/ruby/core/string/chop_spec.rb +++ b/spec/ruby/core/string/chop_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -49,16 +50,12 @@ describe "String#chop" do s.chop.should_not equal(s) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("hello\n").chop.should be_an_instance_of(StringSpecs::MyString) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("hello\n").chop.should be_an_instance_of(String) end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("hello\n").chop.should be_an_instance_of(String) - end + it "returns a String in the same encoding as self" do + "abc\n\n".encode("US-ASCII").chop.encoding.should == Encoding::US_ASCII end end diff --git a/spec/ruby/core/string/clear_spec.rb b/spec/ruby/core/string/clear_spec.rb index e1d68e03bd..152986fd0f 100644 --- a/spec/ruby/core/string/clear_spec.rb +++ b/spec/ruby/core/string/clear_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false require_relative '../../spec_helper' describe "String#clear" do diff --git a/spec/ruby/core/string/clone_spec.rb b/spec/ruby/core/string/clone_spec.rb index f8d40423f0..a2ba2f9877 100644 --- a/spec/ruby/core/string/clone_spec.rb +++ b/spec/ruby/core/string/clone_spec.rb @@ -54,4 +54,8 @@ describe "String#clone" do orig.should == "xtring" clone.should == "string" end + + it "returns a String in the same encoding as self" do + "a".encode("US-ASCII").clone.encoding.should == Encoding::US_ASCII + end end diff --git a/spec/ruby/core/string/codepoints_spec.rb b/spec/ruby/core/string/codepoints_spec.rb index 0b6cde82f7..12a5bf5892 100644 --- a/spec/ruby/core/string/codepoints_spec.rb +++ b/spec/ruby/core/string/codepoints_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../spec_helper' require_relative 'shared/codepoints' require_relative 'shared/each_codepoint_without_block' @@ -11,7 +11,7 @@ describe "String#codepoints" do end it "raises an ArgumentError when no block is given if self has an invalid encoding" do - s = "\xDF".force_encoding(Encoding::UTF_8) + s = "\xDF".dup.force_encoding(Encoding::UTF_8) s.valid_encoding?.should be_false -> { s.codepoints }.should raise_error(ArgumentError) end diff --git a/spec/ruby/core/string/comparison_spec.rb b/spec/ruby/core/string/comparison_spec.rb index 91cfdca25a..9db0cff5ee 100644 --- a/spec/ruby/core/string/comparison_spec.rb +++ b/spec/ruby/core/string/comparison_spec.rb @@ -61,12 +61,12 @@ describe "String#<=> with String" do end it "ignores encoding difference" do - ("ÄÖÛ".force_encoding("utf-8") <=> "ÄÖÜ".force_encoding("iso-8859-1")).should == -1 - ("ÄÖÜ".force_encoding("utf-8") <=> "ÄÖÛ".force_encoding("iso-8859-1")).should == 1 + ("ÄÖÛ".dup.force_encoding("utf-8") <=> "ÄÖÜ".dup.force_encoding("iso-8859-1")).should == -1 + ("ÄÖÜ".dup.force_encoding("utf-8") <=> "ÄÖÛ".dup.force_encoding("iso-8859-1")).should == 1 end it "returns 0 with identical ASCII-compatible bytes of different encodings" do - ("abc".force_encoding("utf-8") <=> "abc".force_encoding("iso-8859-1")).should == 0 + ("abc".dup.force_encoding("utf-8") <=> "abc".dup.force_encoding("iso-8859-1")).should == 0 end it "compares the indices of the encodings when the strings have identical non-ASCII-compatible bytes" do @@ -77,7 +77,7 @@ describe "String#<=> with String" do end it "returns 0 when comparing 2 empty strings but one is not ASCII-compatible" do - ("" <=> "".force_encoding('iso-2022-jp')).should == 0 + ("" <=> "".dup.force_encoding('iso-2022-jp')).should == 0 end end diff --git a/spec/ruby/core/string/concat_spec.rb b/spec/ruby/core/string/concat_spec.rb index 5f6daadad7..cbd7df54e2 100644 --- a/spec/ruby/core/string/concat_spec.rb +++ b/spec/ruby/core/string/concat_spec.rb @@ -5,21 +5,22 @@ require_relative 'shared/concat' describe "String#concat" do it_behaves_like :string_concat, :concat it_behaves_like :string_concat_encoding, :concat + it_behaves_like :string_concat_type_coercion, :concat it "takes multiple arguments" do - str = "hello " + str = +"hello " str.concat "wo", "", "rld" str.should == "hello world" end it "concatenates the initial value when given arguments contain 2 self" do - str = "hello" + str = +"hello" str.concat str, str str.should == "hellohellohello" end it "returns self when given no arguments" do - str = "hello" + str = +"hello" str.concat.should equal(str) str.should == "hello" end diff --git a/spec/ruby/core/string/count_spec.rb b/spec/ruby/core/string/count_spec.rb index 06ba5a4f0e..e614e901dd 100644 --- a/spec/ruby/core/string/count_spec.rb +++ b/spec/ruby/core/string/count_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../spec_helper' require_relative 'fixtures/classes' diff --git a/spec/ruby/core/string/dedup_spec.rb b/spec/ruby/core/string/dedup_spec.rb index 919d440c51..2b31d80708 100644 --- a/spec/ruby/core/string/dedup_spec.rb +++ b/spec/ruby/core/string/dedup_spec.rb @@ -2,7 +2,5 @@ require_relative '../../spec_helper' require_relative 'shared/dedup' describe 'String#dedup' do - ruby_version_is '3.2'do - it_behaves_like :string_dedup, :dedup - end + it_behaves_like :string_dedup, :dedup end diff --git a/spec/ruby/core/string/delete_prefix_spec.rb b/spec/ruby/core/string/delete_prefix_spec.rb index 17ce18bcca..ee7f044905 100644 --- a/spec/ruby/core/string/delete_prefix_spec.rb +++ b/spec/ruby/core/string/delete_prefix_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -38,18 +39,13 @@ describe "String#delete_prefix" do 'hello'.delete_prefix(o).should == 'o' end - ruby_version_is ''...'3.0' do - it "returns a subclass instance when called on a subclass instance" do - s = StringSpecs::MyString.new('hello') - s.delete_prefix('hell').should be_an_instance_of(StringSpecs::MyString) - end + it "returns a String instance when called on a subclass instance" do + s = StringSpecs::MyString.new('hello') + s.delete_prefix('hell').should be_an_instance_of(String) end - ruby_version_is '3.0' do - it "returns a String instance when called on a subclass instance" do - s = StringSpecs::MyString.new('hello') - s.delete_prefix('hell').should be_an_instance_of(String) - end + it "returns a String in the same encoding as self" do + 'hello'.encode("US-ASCII").delete_prefix('hell').encoding.should == Encoding::US_ASCII end end diff --git a/spec/ruby/core/string/delete_spec.rb b/spec/ruby/core/string/delete_spec.rb index b91e88b76f..6d359776e4 100644 --- a/spec/ruby/core/string/delete_spec.rb +++ b/spec/ruby/core/string/delete_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -84,16 +85,12 @@ describe "String#delete" do -> { "hello world".delete(mock('x')) }.should raise_error(TypeError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("oh no!!!").delete("!").should be_an_instance_of(StringSpecs::MyString) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("oh no!!!").delete("!").should be_an_instance_of(String) end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("oh no!!!").delete("!").should be_an_instance_of(String) - end + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").delete("lo").encoding.should == Encoding::US_ASCII end end diff --git a/spec/ruby/core/string/delete_suffix_spec.rb b/spec/ruby/core/string/delete_suffix_spec.rb index 0705c73246..1842d75aa5 100644 --- a/spec/ruby/core/string/delete_suffix_spec.rb +++ b/spec/ruby/core/string/delete_suffix_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -38,18 +39,13 @@ describe "String#delete_suffix" do 'hello'.delete_suffix(o).should == 'h' end - ruby_version_is ''...'3.0' do - it "returns a subclass instance when called on a subclass instance" do - s = StringSpecs::MyString.new('hello') - s.delete_suffix('ello').should be_an_instance_of(StringSpecs::MyString) - end + it "returns a String instance when called on a subclass instance" do + s = StringSpecs::MyString.new('hello') + s.delete_suffix('ello').should be_an_instance_of(String) end - ruby_version_is '3.0' do - it "returns a String instance when called on a subclass instance" do - s = StringSpecs::MyString.new('hello') - s.delete_suffix('ello').should be_an_instance_of(String) - end + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").delete_suffix("ello").encoding.should == Encoding::US_ASCII end end diff --git a/spec/ruby/core/string/downcase_spec.rb b/spec/ruby/core/string/downcase_spec.rb index f0a15f1e25..2d260f23f1 100644 --- a/spec/ruby/core/string/downcase_spec.rb +++ b/spec/ruby/core/string/downcase_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -8,6 +9,10 @@ describe "String#downcase" do "hello".downcase.should == "hello" end + it "returns a String in the same encoding as self" do + "hELLO".encode("US-ASCII").downcase.encoding.should == Encoding::US_ASCII + end + describe "full Unicode case mapping" do it "works for all of Unicode with no option" do "ÄÖÜ".downcase.should == "äöü" @@ -72,16 +77,8 @@ describe "String#downcase" do -> { "ABC".downcase(:invalid_option) }.should raise_error(ArgumentError) end - ruby_version_is ''...'3.0' do - it "returns a subclass instance for subclasses" do - StringSpecs::MyString.new("FOObar").downcase.should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns a String instance for subclasses" do - StringSpecs::MyString.new("FOObar").downcase.should be_an_instance_of(String) - end + it "returns a String instance for subclasses" do + StringSpecs::MyString.new("FOObar").downcase.should be_an_instance_of(String) end end diff --git a/spec/ruby/core/string/dump_spec.rb b/spec/ruby/core/string/dump_spec.rb index 79a8b55e6d..cab8beff5a 100644 --- a/spec/ruby/core/string/dump_spec.rb +++ b/spec/ruby/core/string/dump_spec.rb @@ -7,16 +7,8 @@ describe "String#dump" do "foo".freeze.dump.should_not.frozen? end - ruby_version_is ''...'3.0' do - it "returns a subclass instance" do - StringSpecs::MyString.new.dump.should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns a String instance" do - StringSpecs::MyString.new.dump.should be_an_instance_of(String) - end + it "returns a String instance" do + StringSpecs::MyString.new.dump.should be_an_instance_of(String) end it "wraps string with \"" do @@ -350,7 +342,7 @@ describe "String#dump" do ].should be_computed_by(:dump) end - it "returns a string with multi-byte UTF-8 characters replaced by \\u{} notation with upper-case hex digits" do + it "returns a string with multi-byte UTF-8 characters less than or equal 0xFFFF replaced by \\uXXXX notation with upper-case hex digits" do [ [0200.chr('utf-8'), '"\u0080"'], [0201.chr('utf-8'), '"\u0081"'], [0202.chr('utf-8'), '"\u0082"'], @@ -382,15 +374,21 @@ describe "String#dump" do [0235.chr('utf-8'), '"\u009D"'], [0236.chr('utf-8'), '"\u009E"'], [0237.chr('utf-8'), '"\u009F"'], + [0177777.chr('utf-8'), '"\uFFFF"'], ].should be_computed_by(:dump) end + it "returns a string with multi-byte UTF-8 characters greater than 0xFFFF replaced by \\u{XXXXXX} notation with upper-case hex digits" do + 0x10000.chr('utf-8').dump.should == '"\u{10000}"' + 0x10FFFF.chr('utf-8').dump.should == '"\u{10FFFF}"' + end + it "includes .force_encoding(name) if the encoding isn't ASCII compatible" do "\u{876}".encode('utf-16be').dump.should.end_with?(".force_encoding(\"UTF-16BE\")") "\u{876}".encode('utf-16le').dump.should.end_with?(".force_encoding(\"UTF-16LE\")") end - it "keeps origin encoding" do + it "returns a String in the same encoding as self" do "foo".encode("ISO-8859-1").dump.encoding.should == Encoding::ISO_8859_1 "foo".encode('windows-1251').dump.encoding.should == Encoding::Windows_1251 1.chr.dump.encoding.should == Encoding::US_ASCII diff --git a/spec/ruby/core/string/dup_spec.rb b/spec/ruby/core/string/dup_spec.rb index eec3cf0a70..073802d84b 100644 --- a/spec/ruby/core/string/dup_spec.rb +++ b/spec/ruby/core/string/dup_spec.rb @@ -51,11 +51,15 @@ describe "String#dup" do end it "does not modify the original setbyte-mutated string when changing dupped string" do - orig = "a" + orig = +"a" orig.setbyte 0, "b".ord copy = orig.dup orig.setbyte 0, "c".ord orig.should == "c" copy.should == "b" end + + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").dup.encoding.should == Encoding::US_ASCII + end end diff --git a/spec/ruby/core/string/each_byte_spec.rb b/spec/ruby/core/string/each_byte_spec.rb index e04dca807f..7b3db265ac 100644 --- a/spec/ruby/core/string/each_byte_spec.rb +++ b/spec/ruby/core/string/each_byte_spec.rb @@ -9,26 +9,26 @@ describe "String#each_byte" do end it "keeps iterating from the old position (to new string end) when self changes" do - r = "" - s = "hello world" + r = +"" + s = +"hello world" s.each_byte do |c| r << c s.insert(0, "<>") if r.size < 3 end r.should == "h><>hello world" - r = "" - s = "hello world" + r = +"" + s = +"hello world" s.each_byte { |c| s.slice!(-1); r << c } r.should == "hello " - r = "" - s = "hello world" + r = +"" + s = +"hello world" s.each_byte { |c| s.slice!(0); r << c } r.should == "hlowrd" - r = "" - s = "hello world" + r = +"" + s = +"hello world" s.each_byte { |c| s.slice!(0..-1); r << c } r.should == "h" end diff --git a/spec/ruby/core/string/each_char_spec.rb b/spec/ruby/core/string/each_char_spec.rb index aff98c0a5c..36219f79db 100644 --- a/spec/ruby/core/string/each_char_spec.rb +++ b/spec/ruby/core/string/each_char_spec.rb @@ -1,3 +1,4 @@ +require_relative "../../spec_helper" require_relative 'shared/chars' require_relative 'shared/each_char_without_block' diff --git a/spec/ruby/core/string/each_grapheme_cluster_spec.rb b/spec/ruby/core/string/each_grapheme_cluster_spec.rb index b45d89ecb0..e1fa4ae67b 100644 --- a/spec/ruby/core/string/each_grapheme_cluster_spec.rb +++ b/spec/ruby/core/string/each_grapheme_cluster_spec.rb @@ -1,3 +1,4 @@ +require_relative "../../spec_helper" require_relative 'shared/chars' require_relative 'shared/grapheme_clusters' require_relative 'shared/each_char_without_block' @@ -7,11 +8,9 @@ describe "String#each_grapheme_cluster" do it_behaves_like :string_grapheme_clusters, :each_grapheme_cluster it_behaves_like :string_each_char_without_block, :each_grapheme_cluster - ruby_version_is '3.0' do - it "yields String instances for subclasses" do - a = [] - StringSpecs::MyString.new("abc").each_grapheme_cluster { |s| a << s.class } - a.should == [String, String, String] - end + it "yields String instances for subclasses" do + a = [] + StringSpecs::MyString.new("abc").each_grapheme_cluster { |s| a << s.class } + a.should == [String, String, String] end end diff --git a/spec/ruby/core/string/element_set_spec.rb b/spec/ruby/core/string/element_set_spec.rb index 881b4343d4..e7599f832c 100644 --- a/spec/ruby/core/string/element_set_spec.rb +++ b/spec/ruby/core/string/element_set_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -357,11 +358,11 @@ describe "String#[]= with a Range index" do end it "raises a RangeError if negative Range begin is out of range" do - -> { "abc"[-4..-2] = "x" }.should raise_error(RangeError) + -> { "abc"[-4..-2] = "x" }.should raise_error(RangeError, "-4..-2 out of range") end it "raises a RangeError if positive Range begin is greater than String size" do - -> { "abc"[4..2] = "x" }.should raise_error(RangeError) + -> { "abc"[4..2] = "x" }.should raise_error(RangeError, "4..2 out of range") end it "uses the Range end as an index rather than a count" do diff --git a/spec/ruby/core/string/encode_spec.rb b/spec/ruby/core/string/encode_spec.rb index 5604ab7210..cd449498a3 100644 --- a/spec/ruby/core/string/encode_spec.rb +++ b/spec/ruby/core/string/encode_spec.rb @@ -34,8 +34,8 @@ describe "String#encode" do it "encodes an ascii substring of a binary string to UTF-8" do x82 = [0x82].pack('C') - str = "#{x82}foo".force_encoding("binary")[1..-1].encode("utf-8") - str.should == "foo".force_encoding("utf-8") + str = "#{x82}foo".dup.force_encoding("binary")[1..-1].encode("utf-8") + str.should == "foo".dup.force_encoding("utf-8") str.encoding.should equal(Encoding::UTF_8) end end @@ -49,7 +49,7 @@ describe "String#encode" do end it "round trips a String" do - str = "abc def".force_encoding Encoding::US_ASCII + str = "abc def".dup.force_encoding Encoding::US_ASCII str.encode("utf-32be").encode("ascii").should == "abc def" end end @@ -61,10 +61,22 @@ describe "String#encode" do str.encode(invalid: :replace).should_not equal(str) end - it "normalizes newlines" do - "\r\nfoo".encode(universal_newline: true).should == "\nfoo" + it "normalizes newlines with cr_newline option" do + "\r\nfoo".encode(cr_newline: true).should == "\r\rfoo" + "\rfoo".encode(cr_newline: true).should == "\rfoo" + "\nfoo".encode(cr_newline: true).should == "\rfoo" + end + + it "normalizes newlines with crlf_newline option" do + "\r\nfoo".encode(crlf_newline: true).should == "\r\r\nfoo" + "\rfoo".encode(crlf_newline: true).should == "\rfoo" + "\nfoo".encode(crlf_newline: true).should == "\r\nfoo" + end + it "normalizes newlines with universal_newline option" do + "\r\nfoo".encode(universal_newline: true).should == "\nfoo" "\rfoo".encode(universal_newline: true).should == "\nfoo" + "\nfoo".encode(universal_newline: true).should == "\nfoo" end it "replaces invalid encoding in source with default replacement" do @@ -79,6 +91,10 @@ describe "String#encode" do encoded.encode("UTF-8").should == "ちfoofoo" end + it "replace multiple invalid bytes at the end with a single replacement character" do + "\xE3\x81\x93\xE3\x81".encode("UTF-8", invalid: :replace).should == "\u3053\ufffd" + end + it "replaces invalid encoding in source using a specified replacement even when a fallback is given" do encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: -> c { "bar" }) encoded.should == "\u3061foofoo".encode("UTF-16LE") @@ -118,8 +134,7 @@ describe "String#encode" do describe "when passed to, from" do it "returns a copy in the destination encoding when both encodings are the same" do - str = "あ" - str.force_encoding("binary") + str = "あ".dup.force_encoding("binary") encoded = str.encode("utf-8", "utf-8") encoded.should_not equal(str) @@ -151,8 +166,7 @@ describe "String#encode" do end it "returns a copy in the destination encoding when both encodings are the same" do - str = "あ" - str.force_encoding("binary") + str = "あ".dup.force_encoding("binary") encoded = str.encode("utf-8", "utf-8", invalid: :replace) encoded.should_not equal(str) @@ -187,13 +201,13 @@ describe "String#encode!" do describe "when passed no options" do it "returns self when Encoding.default_internal is nil" do Encoding.default_internal = nil - str = "あ" + str = +"あ" str.encode!.should equal(str) end it "returns self for a ASCII-only String when Encoding.default_internal is nil" do Encoding.default_internal = nil - str = "abc" + str = +"abc" str.encode!.should equal(str) end end @@ -201,14 +215,14 @@ describe "String#encode!" do describe "when passed options" do it "returns self for ASCII-only String when Encoding.default_internal is nil" do Encoding.default_internal = nil - str = "abc" + str = +"abc" str.encode!(invalid: :replace).should equal(str) end end describe "when passed to encoding" do it "returns self" do - str = "abc" + str = +"abc" result = str.encode!(Encoding::BINARY) result.encoding.should equal(Encoding::BINARY) result.should equal(str) @@ -217,7 +231,7 @@ describe "String#encode!" do describe "when passed to, from" do it "returns self" do - str = "ああ" + str = +"ああ" result = str.encode!("euc-jp", "utf-8") result.encoding.should equal(Encoding::EUC_JP) result.should equal(str) diff --git a/spec/ruby/core/string/encoding_spec.rb b/spec/ruby/core/string/encoding_spec.rb index 574a1e2f92..f6e8fd3470 100644 --- a/spec/ruby/core/string/encoding_spec.rb +++ b/spec/ruby/core/string/encoding_spec.rb @@ -14,11 +14,11 @@ describe "String#encoding" do end it "returns the given encoding if #force_encoding has been called" do - "a".force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "a".dup.force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS end it "returns the given encoding if #encode!has been called" do - "a".encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "a".dup.encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS end end @@ -108,13 +108,13 @@ describe "String#encoding for Strings with \\u escapes" do end it "returns the given encoding if #force_encoding has been called" do - "\u{20}".force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS - "\u{2020}".force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "\u{20}".dup.force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "\u{2020}".dup.force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS end it "returns the given encoding if #encode!has been called" do - "\u{20}".encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS - "\u{2020}".encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "\u{20}".dup.encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "\u{2020}".dup.encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS end end @@ -173,16 +173,12 @@ describe "String#encoding for Strings with \\x escapes" do end it "returns the given encoding if #force_encoding has been called" do - x50 = "\x50" - x50.force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS - xD4 = [212].pack('C') - xD4.force_encoding(Encoding::ISO_8859_9).encoding.should == Encoding::ISO_8859_9 + "\x50".dup.force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + [212].pack('C').force_encoding(Encoding::ISO_8859_9).encoding.should == Encoding::ISO_8859_9 end it "returns the given encoding if #encode!has been called" do - x50 = "\x50" - x50.encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS - x00 = "x\00" - x00.encode!(Encoding::UTF_8).encoding.should == Encoding::UTF_8 + "\x50".dup.encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "x\00".dup.encode!(Encoding::UTF_8).encoding.should == Encoding::UTF_8 end end diff --git a/spec/ruby/core/string/fixtures/to_c.rb b/spec/ruby/core/string/fixtures/to_c.rb new file mode 100644 index 0000000000..7776933263 --- /dev/null +++ b/spec/ruby/core/string/fixtures/to_c.rb @@ -0,0 +1,5 @@ +module StringSpecs + def self.to_c_method(string) + string.to_c + end +end diff --git a/spec/ruby/core/string/fixtures/utf-8-encoding.rb b/spec/ruby/core/string/fixtures/utf-8-encoding.rb deleted file mode 100644 index fd243ec522..0000000000 --- a/spec/ruby/core/string/fixtures/utf-8-encoding.rb +++ /dev/null @@ -1,7 +0,0 @@ -# -*- encoding: utf-8 -*- -module StringSpecs - class UTF8Encoding - def self.source_encoding; __ENCODING__; end - def self.egrave; "é"; end - end -end diff --git a/spec/ruby/core/string/force_encoding_spec.rb b/spec/ruby/core/string/force_encoding_spec.rb index f37aaf9eb4..2259dcf3cf 100644 --- a/spec/ruby/core/string/force_encoding_spec.rb +++ b/spec/ruby/core/string/force_encoding_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false require_relative '../../spec_helper' describe "String#force_encoding" do diff --git a/spec/ruby/core/string/freeze_spec.rb b/spec/ruby/core/string/freeze_spec.rb index 04d1e9513c..2e8e70386d 100644 --- a/spec/ruby/core/string/freeze_spec.rb +++ b/spec/ruby/core/string/freeze_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false require_relative '../../spec_helper' describe "String#freeze" do diff --git a/spec/ruby/core/string/grapheme_clusters_spec.rb b/spec/ruby/core/string/grapheme_clusters_spec.rb index 3046265a12..380a245083 100644 --- a/spec/ruby/core/string/grapheme_clusters_spec.rb +++ b/spec/ruby/core/string/grapheme_clusters_spec.rb @@ -1,3 +1,4 @@ +require_relative "../../spec_helper" require_relative 'shared/chars' require_relative 'shared/grapheme_clusters' diff --git a/spec/ruby/core/string/gsub_spec.rb b/spec/ruby/core/string/gsub_spec.rb index 3211ebbd0a..0d9f32eca2 100644 --- a/spec/ruby/core/string/gsub_spec.rb +++ b/spec/ruby/core/string/gsub_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -192,26 +193,13 @@ describe "String#gsub with pattern and replacement" do -> { "hello".gsub(/[aeiou]/, nil) }.should raise_error(TypeError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("").gsub(//, "").should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("").gsub(/foo/, "").should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").gsub(/foo/, "").should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").gsub("foo", "").should be_an_instance_of(StringSpecs::MyString) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("").gsub(//, "").should be_an_instance_of(String) + StringSpecs::MyString.new("").gsub(/foo/, "").should be_an_instance_of(String) + StringSpecs::MyString.new("foo").gsub(/foo/, "").should be_an_instance_of(String) + StringSpecs::MyString.new("foo").gsub("foo", "").should be_an_instance_of(String) end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("").gsub(//, "").should be_an_instance_of(String) - StringSpecs::MyString.new("").gsub(/foo/, "").should be_an_instance_of(String) - StringSpecs::MyString.new("foo").gsub(/foo/, "").should be_an_instance_of(String) - StringSpecs::MyString.new("foo").gsub("foo", "").should be_an_instance_of(String) - end - end - - # Note: $~ cannot be tested because mspec messes with it - it "sets $~ to MatchData of last match and nil when there's none" do 'hello.'.gsub('hello', 'x') $~[0].should == 'hello' @@ -225,6 +213,18 @@ describe "String#gsub with pattern and replacement" do 'hello.'.gsub(/not/, 'x') $~.should == nil end + + it "handles a pattern in a superset encoding" do + result = 'abc'.force_encoding(Encoding::US_ASCII).gsub('é', 'è') + result.should == 'abc' + result.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + result = 'été'.gsub('t'.force_encoding(Encoding::US_ASCII), 'u') + result.should == 'éué' + result.encoding.should == Encoding::UTF_8 + end end describe "String#gsub with pattern and Hash" do @@ -521,6 +521,27 @@ describe "String#gsub! with pattern and replacement" do -> { s.gsub!(/e/, "e") }.should raise_error(FrozenError) -> { s.gsub!(/[aeiou]/, '*') }.should raise_error(FrozenError) end + + it "handles a pattern in a superset encoding" do + string = 'abc'.force_encoding(Encoding::US_ASCII) + + result = string.gsub!('é', 'è') + + result.should == nil + string.should == 'abc' + string.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + string = 'été' + pattern = 't'.force_encoding(Encoding::US_ASCII) + + result = string.gsub!(pattern, 'u') + + result.should == string + string.should == 'éué' + string.encoding.should == Encoding::UTF_8 + end end describe "String#gsub! with pattern and block" do diff --git a/spec/ruby/core/string/include_spec.rb b/spec/ruby/core/string/include_spec.rb index 23e1e134ec..9781140a55 100644 --- a/spec/ruby/core/string/include_spec.rb +++ b/spec/ruby/core/string/include_spec.rb @@ -15,16 +15,16 @@ describe "String#include? with String" do it "returns true if both strings are empty" do "".should.include?("") - "".force_encoding("EUC-JP").should.include?("") - "".should.include?("".force_encoding("EUC-JP")) - "".force_encoding("EUC-JP").should.include?("".force_encoding("EUC-JP")) + "".dup.force_encoding("EUC-JP").should.include?("") + "".should.include?("".dup.force_encoding("EUC-JP")) + "".dup.force_encoding("EUC-JP").should.include?("".dup.force_encoding("EUC-JP")) end it "returns true if the RHS is empty" do "a".should.include?("") - "a".force_encoding("EUC-JP").should.include?("") - "a".should.include?("".force_encoding("EUC-JP")) - "a".force_encoding("EUC-JP").should.include?("".force_encoding("EUC-JP")) + "a".dup.force_encoding("EUC-JP").should.include?("") + "a".should.include?("".dup.force_encoding("EUC-JP")) + "a".dup.force_encoding("EUC-JP").should.include?("".dup.force_encoding("EUC-JP")) end it "tries to convert other to string using to_str" do diff --git a/spec/ruby/core/string/index_spec.rb b/spec/ruby/core/string/index_spec.rb index 5d77a88e4e..835263a2cd 100644 --- a/spec/ruby/core/string/index_spec.rb +++ b/spec/ruby/core/string/index_spec.rb @@ -159,6 +159,21 @@ describe "String#index with String" do "あれ".index char end.should raise_error(Encoding::CompatibilityError) end + + it "handles a substring in a superset encoding" do + 'abc'.dup.force_encoding(Encoding::US_ASCII).index('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.index('t'.dup.force_encoding(Encoding::US_ASCII)).should == 1 + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + str = 'abc'.dup.force_encoding("ISO-2022-JP") + pattern = 'b'.dup.force_encoding("EUC-JP") + + -> { str.index(pattern) }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: ISO-2022-JP and EUC-JP") + end end describe "String#index with Regexp" do @@ -216,6 +231,17 @@ describe "String#index with Regexp" do $~.should == nil end + ruby_bug "#20421", ""..."3.3" do + it "always clear $~" do + "a".index(/a/) + $~.should_not == nil + + string = "blablabla" + string.index(/bla/, string.length + 1) + $~.should == nil + end + end + it "starts the search at the given offset" do "blablabla".index(/.{0}/, 5).should == 5 "blablabla".index(/.{1}/, 5).should == 5 @@ -304,6 +330,17 @@ describe "String#index with Regexp" do "われわわれ".index(/わ/, 3).should == 3 end + ruby_bug "#19763", ""..."3.3.0" do + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + re = Regexp.new "れ".encode(Encoding::EUC_JP) + -> do + "あれ".index re + end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") + end + end + + # The exception message was incorrectly "incompatible character encodings: UTF-8 and EUC-JP" before 3.3.0 + # Still test that the right exception class is used before that. it "raises an Encoding::CompatibilityError if the encodings are incompatible" do re = Regexp.new "れ".encode(Encoding::EUC_JP) -> do diff --git a/spec/ruby/core/string/insert_spec.rb b/spec/ruby/core/string/insert_spec.rb index 0c87df3a95..483f3c9367 100644 --- a/spec/ruby/core/string/insert_spec.rb +++ b/spec/ruby/core/string/insert_spec.rb @@ -1,5 +1,5 @@ # -*- encoding: utf-8 -*- - +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' diff --git a/spec/ruby/core/string/inspect_spec.rb b/spec/ruby/core/string/inspect_spec.rb index 8bf3d3161f..15db06c7f5 100644 --- a/spec/ruby/core/string/inspect_spec.rb +++ b/spec/ruby/core/string/inspect_spec.rb @@ -327,7 +327,7 @@ describe "String#inspect" do end it "works for broken US-ASCII strings" do - s = "©".force_encoding("US-ASCII") + s = "©".dup.force_encoding("US-ASCII") s.inspect.should == '"\xC2\xA9"' end diff --git a/spec/ruby/core/string/lines_spec.rb b/spec/ruby/core/string/lines_spec.rb index ad4b119074..40ab5f71d8 100644 --- a/spec/ruby/core/string/lines_spec.rb +++ b/spec/ruby/core/string/lines_spec.rb @@ -1,7 +1,6 @@ require_relative '../../spec_helper' require_relative 'fixtures/classes' require_relative 'shared/each_line' -require_relative 'shared/each_line_without_block' describe "String#lines" do it_behaves_like :string_each_line, :lines diff --git a/spec/ruby/core/string/ljust_spec.rb b/spec/ruby/core/string/ljust_spec.rb index 9a25d3abd4..47324c59d2 100644 --- a/spec/ruby/core/string/ljust_spec.rb +++ b/spec/ruby/core/string/ljust_spec.rb @@ -64,31 +64,18 @@ describe "String#ljust with length, padding" do -> { "hello".ljust(10, '') }.should raise_error(ArgumentError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on subclasses" do - StringSpecs::MyString.new("").ljust(10).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").ljust(10).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(StringSpecs::MyString) - - "".ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - "foo".ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - end - end + it "returns String instances when called on subclasses" do + StringSpecs::MyString.new("").ljust(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").ljust(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - ruby_version_is '3.0' do - it "returns String instances when called on subclasses" do - StringSpecs::MyString.new("").ljust(10).should be_an_instance_of(String) - StringSpecs::MyString.new("foo").ljust(10).should be_an_instance_of(String) - StringSpecs::MyString.new("foo").ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - - "".ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - "foo".ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - end + "".ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + "foo".ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) end describe "with width" do it "returns a String in the same encoding as the original" do - str = "abc".force_encoding Encoding::IBM437 + str = "abc".dup.force_encoding Encoding::IBM437 result = str.ljust 5 result.should == "abc " result.encoding.should equal(Encoding::IBM437) @@ -97,7 +84,7 @@ describe "String#ljust with length, padding" do describe "with width, pattern" do it "returns a String in the compatible encoding" do - str = "abc".force_encoding Encoding::IBM437 + str = "abc".dup.force_encoding Encoding::IBM437 result = str.ljust 5, "あ" result.should == "abcああ" result.encoding.should equal(Encoding::UTF_8) diff --git a/spec/ruby/core/string/lstrip_spec.rb b/spec/ruby/core/string/lstrip_spec.rb index 75434613f1..c83650207e 100644 --- a/spec/ruby/core/string/lstrip_spec.rb +++ b/spec/ruby/core/string/lstrip_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' require_relative 'shared/strip' @@ -6,11 +7,11 @@ describe "String#lstrip" do it_behaves_like :string_strip, :lstrip it "returns a copy of self with leading whitespace removed" do - " hello ".lstrip.should == "hello " - " hello world ".lstrip.should == "hello world " - "\n\r\t\n\v\r hello world ".lstrip.should == "hello world " - "hello".lstrip.should == "hello" - " こにちわ".lstrip.should == "こにちわ" + " hello ".lstrip.should == "hello " + " hello world ".lstrip.should == "hello world " + "\n\r\t\n\v\r hello world ".lstrip.should == "hello world " + "hello".lstrip.should == "hello" + " こにちわ".lstrip.should == "こにちわ" end it "works with lazy substrings" do @@ -20,11 +21,9 @@ describe "String#lstrip" do " こにちわ "[1...-1].lstrip.should == "こにちわ" end - ruby_version_is '3.0' do - it "strips leading \\0" do - "\x00hello".lstrip.should == "hello" - "\000 \000hello\000 \000".lstrip.should == "hello\000 \000" - end + it "strips leading \\0" do + "\x00hello".lstrip.should == "hello" + "\000 \000hello\000 \000".lstrip.should == "hello\000 \000" end end @@ -47,12 +46,10 @@ describe "String#lstrip!" do " ".lstrip.should == "" end - ruby_version_is '3.0' do - it "removes leading NULL bytes and whitespace" do - a = "\000 \000hello\000 \000" - a.lstrip! - a.should == "hello\000 \000" - end + it "removes leading NULL bytes and whitespace" do + a = "\000 \000hello\000 \000" + a.lstrip! + a.should == "hello\000 \000" end it "raises a FrozenError on a frozen instance that is modified" do diff --git a/spec/ruby/core/string/modulo_spec.rb b/spec/ruby/core/string/modulo_spec.rb index bf96a82874..46e0aa0f36 100644 --- a/spec/ruby/core/string/modulo_spec.rb +++ b/spec/ruby/core/string/modulo_spec.rb @@ -55,33 +55,48 @@ describe "String#%" do -> { ("foo%" % [])}.should raise_error(ArgumentError) end - it "formats single % character before a newline as literal %" do - ("%\n" % []).should == "%\n" - ("foo%\n" % []).should == "foo%\n" - ("%\n.3f" % 1.2).should == "%\n.3f" - end + ruby_version_is ""..."3.4" do + it "formats single % character before a newline as literal %" do + ("%\n" % []).should == "%\n" + ("foo%\n" % []).should == "foo%\n" + ("%\n.3f" % 1.2).should == "%\n.3f" + end - it "formats single % character before a NUL as literal %" do - ("%\0" % []).should == "%\0" - ("foo%\0" % []).should == "foo%\0" - ("%\0.3f" % 1.2).should == "%\0.3f" - end + it "formats single % character before a NUL as literal %" do + ("%\0" % []).should == "%\0" + ("foo%\0" % []).should == "foo%\0" + ("%\0.3f" % 1.2).should == "%\0.3f" + end - it "raises an error if single % appears anywhere else" do - -> { (" % " % []) }.should raise_error(ArgumentError) - -> { ("foo%quux" % []) }.should raise_error(ArgumentError) - end + it "raises an error if single % appears anywhere else" do + -> { (" % " % []) }.should raise_error(ArgumentError) + -> { ("foo%quux" % []) }.should raise_error(ArgumentError) + end - it "raises an error if NULL or \\n appear anywhere else in the format string" do - begin - old_debug, $DEBUG = $DEBUG, false + it "raises an error if NULL or \\n appear anywhere else in the format string" do + begin + old_debug, $DEBUG = $DEBUG, false + + -> { "%.\n3f" % 1.2 }.should raise_error(ArgumentError) + -> { "%.3\nf" % 1.2 }.should raise_error(ArgumentError) + -> { "%.\03f" % 1.2 }.should raise_error(ArgumentError) + -> { "%.3\0f" % 1.2 }.should raise_error(ArgumentError) + ensure + $DEBUG = old_debug + end + end + end + ruby_version_is "3.4" do + it "raises an ArgumentError if % is not followed by a conversion specifier" do + -> { "%" % [] }.should raise_error(ArgumentError) + -> { "%\n" % [] }.should raise_error(ArgumentError) + -> { "%\0" % [] }.should raise_error(ArgumentError) + -> { " % " % [] }.should raise_error(ArgumentError) -> { "%.\n3f" % 1.2 }.should raise_error(ArgumentError) -> { "%.3\nf" % 1.2 }.should raise_error(ArgumentError) -> { "%.\03f" % 1.2 }.should raise_error(ArgumentError) -> { "%.3\0f" % 1.2 }.should raise_error(ArgumentError) - ensure - $DEBUG = old_debug end end @@ -125,8 +140,16 @@ describe "String#%" do end end - it "replaces trailing absolute argument specifier without type with percent sign" do - ("hello %1$" % "foo").should == "hello %" + ruby_version_is ""..."3.4" do + it "replaces trailing absolute argument specifier without type with percent sign" do + ("hello %1$" % "foo").should == "hello %" + end + end + + ruby_version_is "3.4" do + it "raises an ArgumentError if absolute argument specifier is followed by a conversion specifier" do + -> { "hello %1$" % "foo" }.should raise_error(ArgumentError) + end end it "raises an ArgumentError when given invalid argument specifiers" do @@ -368,16 +391,8 @@ describe "String#%" do ("%c" % 'A').should == "A" end - ruby_version_is ""..."3.2" do - it "raises an exception for multiple character strings as argument for %c" do - -> { "%c" % 'AA' }.should raise_error(ArgumentError) - end - end - - ruby_version_is "3.2" do - it "supports only the first character as argument for %c" do - ("%c" % 'AA').should == "A" - end + it "supports only the first character as argument for %c" do + ("%c" % 'AA').should == "A" end it "calls to_str on argument for %c formats" do @@ -547,7 +562,7 @@ describe "String#%" do ("%1$p" % [10, 5]).should == "10" ("%-22p" % 10).should == "10 " ("%*p" % [10, 10]).should == " 10" - ("%p" % {capture: 1}).should == "{:capture=>1}" + ("%p" % {capture: 1}).should == {capture: 1}.inspect ("%p" % "str").should == "\"str\"" end @@ -726,6 +741,11 @@ describe "String#%" do (format % "-10.4e-20").should == (format % -10.4e-20) (format % ".5").should == (format % 0.5) (format % "-.5").should == (format % -0.5) + + ruby_version_is "3.4" do + (format % "10.").should == (format % 10) + end + # Something's strange with this spec: # it works just fine in individual mode, but not when run as part of a group (format % "10_1_0.5_5_5").should == (format % 1010.555) @@ -735,7 +755,6 @@ describe "String#%" do -> { format % "" }.should raise_error(ArgumentError) -> { format % "x" }.should raise_error(ArgumentError) -> { format % "." }.should raise_error(ArgumentError) - -> { format % "10." }.should raise_error(ArgumentError) -> { format % "5x" }.should raise_error(ArgumentError) -> { format % "0b1" }.should raise_error(ArgumentError) -> { format % "10e10.5" }.should raise_error(ArgumentError) diff --git a/spec/ruby/core/string/ord_spec.rb b/spec/ruby/core/string/ord_spec.rb index 4cf26990fe..35af3b5458 100644 --- a/spec/ruby/core/string/ord_spec.rb +++ b/spec/ruby/core/string/ord_spec.rb @@ -27,7 +27,7 @@ describe "String#ord" do end it "raises ArgumentError if the character is broken" do - s = "©".force_encoding("US-ASCII") + s = "©".dup.force_encoding("US-ASCII") -> { s.ord }.should raise_error(ArgumentError, "invalid byte sequence in US-ASCII") end end diff --git a/spec/ruby/core/string/partition_spec.rb b/spec/ruby/core/string/partition_spec.rb index 98311f2be4..d5370dcc73 100644 --- a/spec/ruby/core/string/partition_spec.rb +++ b/spec/ruby/core/string/partition_spec.rb @@ -38,4 +38,26 @@ describe "String#partition with String" do it "takes precedence over a given block" do "hello world".partition("o") { true }.should == ["hell", "o", " world"] end + + it "handles a pattern in a superset encoding" do + string = "hello".dup.force_encoding(Encoding::US_ASCII) + + result = string.partition("é") + + result.should == ["hello", "", ""] + result[0].encoding.should == Encoding::US_ASCII + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + pattern = "o".dup.force_encoding(Encoding::US_ASCII) + + result = "héllo world".partition(pattern) + + result.should == ["héll", "o", " world"] + result[0].encoding.should == Encoding::UTF_8 + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::UTF_8 + end end diff --git a/spec/ruby/core/string/plus_spec.rb b/spec/ruby/core/string/plus_spec.rb index 5ff198f07e..9da17451c6 100644 --- a/spec/ruby/core/string/plus_spec.rb +++ b/spec/ruby/core/string/plus_spec.rb @@ -3,6 +3,9 @@ require_relative 'fixtures/classes' require_relative 'shared/concat' describe "String#+" do + it_behaves_like :string_concat_encoding, :+ + it_behaves_like :string_concat_type_coercion, :+ + it "returns a new string containing the given string concatenated to self" do ("" + "").should == "" ("" + "Hello").should == "Hello" @@ -31,6 +34,4 @@ describe "String#+" do ("hello" + StringSpecs::MyString.new("foo")).should be_an_instance_of(String) ("hello" + StringSpecs::MyString.new("")).should be_an_instance_of(String) end - - it_behaves_like :string_concat_encoding, :+ end diff --git a/spec/ruby/core/string/prepend_spec.rb b/spec/ruby/core/string/prepend_spec.rb index a0393d4760..5248ea8056 100644 --- a/spec/ruby/core/string/prepend_spec.rb +++ b/spec/ruby/core/string/prepend_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' diff --git a/spec/ruby/core/string/reverse_spec.rb b/spec/ruby/core/string/reverse_spec.rb index 4206b8af90..aa6abe6036 100644 --- a/spec/ruby/core/string/reverse_spec.rb +++ b/spec/ruby/core/string/reverse_spec.rb @@ -1,4 +1,5 @@ # encoding: utf-8 +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -10,20 +11,10 @@ describe "String#reverse" do "".reverse.should == "" end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("stressed").reverse.should be_an_instance_of(String) - StringSpecs::MyString.new("m").reverse.should be_an_instance_of(String) - StringSpecs::MyString.new("").reverse.should be_an_instance_of(String) - end - end - - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("stressed").reverse.should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("m").reverse.should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("").reverse.should be_an_instance_of(StringSpecs::MyString) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("stressed").reverse.should be_an_instance_of(String) + StringSpecs::MyString.new("m").reverse.should be_an_instance_of(String) + StringSpecs::MyString.new("").reverse.should be_an_instance_of(String) end it "reverses a string with multi byte characters" do @@ -37,6 +28,10 @@ describe "String#reverse" do str.reverse.should == "體黑正\xDE\xDF軟微" end + + it "returns a String in the same encoding as self" do + "stressed".encode("US-ASCII").reverse.encoding.should == Encoding::US_ASCII + end end describe "String#reverse!" do diff --git a/spec/ruby/core/string/rindex_spec.rb b/spec/ruby/core/string/rindex_spec.rb index a3b437a1e4..0863a9c3be 100644 --- a/spec/ruby/core/string/rindex_spec.rb +++ b/spec/ruby/core/string/rindex_spec.rb @@ -1,7 +1,6 @@ # -*- encoding: utf-8 -*- require_relative '../../spec_helper' require_relative 'fixtures/classes' -require_relative 'fixtures/utf-8-encoding' describe "String#rindex with object" do it "raises a TypeError if obj isn't a String or Regexp" do @@ -196,6 +195,21 @@ describe "String#rindex with String" do it "raises a TypeError when given offset is nil" do -> { "str".rindex("st", nil) }.should raise_error(TypeError) end + + it "handles a substring in a superset encoding" do + 'abc'.dup.force_encoding(Encoding::US_ASCII).rindex('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.rindex('t'.dup.force_encoding(Encoding::US_ASCII)).should == 1 + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + str = 'abc'.dup.force_encoding("ISO-2022-JP") + pattern = 'b'.dup.force_encoding("EUC-JP") + + -> { str.rindex(pattern) }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: ISO-2022-JP and EUC-JP") + end end describe "String#rindex with Regexp" do @@ -357,14 +371,14 @@ describe "String#rindex with Regexp" do end it "returns the character index before the finish" do - "ありがりがとう".rindex("が", 3).should == 2 - "ありがりがとう".rindex(/が/, 3).should == 2 + "ありがりがとう".rindex("が", 3).should == 2 + "ありがりがとう".rindex(/が/, 3).should == 2 end it "raises an Encoding::CompatibilityError if the encodings are incompatible" do re = Regexp.new "れ".encode(Encoding::EUC_JP) -> do "あれ".rindex re - end.should raise_error(Encoding::CompatibilityError) + end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") end end diff --git a/spec/ruby/core/string/rjust_spec.rb b/spec/ruby/core/string/rjust_spec.rb index d067b7bdb3..4ad3e54aea 100644 --- a/spec/ruby/core/string/rjust_spec.rb +++ b/spec/ruby/core/string/rjust_spec.rb @@ -64,31 +64,18 @@ describe "String#rjust with length, padding" do -> { "hello".rjust(10, '') }.should raise_error(ArgumentError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on subclasses" do - StringSpecs::MyString.new("").rjust(10).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").rjust(10).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(StringSpecs::MyString) - - "".rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - "foo".rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - end - end + it "returns String instances when called on subclasses" do + StringSpecs::MyString.new("").rjust(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").rjust(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - ruby_version_is '3.0' do - it "returns String instances when called on subclasses" do - StringSpecs::MyString.new("").rjust(10).should be_an_instance_of(String) - StringSpecs::MyString.new("foo").rjust(10).should be_an_instance_of(String) - StringSpecs::MyString.new("foo").rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - - "".rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - "foo".rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) - end + "".rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + "foo".rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) end describe "with width" do it "returns a String in the same encoding as the original" do - str = "abc".force_encoding Encoding::IBM437 + str = "abc".dup.force_encoding Encoding::IBM437 result = str.rjust 5 result.should == " abc" result.encoding.should equal(Encoding::IBM437) @@ -97,7 +84,7 @@ describe "String#rjust with length, padding" do describe "with width, pattern" do it "returns a String in the compatible encoding" do - str = "abc".force_encoding Encoding::IBM437 + str = "abc".dup.force_encoding Encoding::IBM437 result = str.rjust 5, "あ" result.should == "ああabc" result.encoding.should equal(Encoding::UTF_8) diff --git a/spec/ruby/core/string/rpartition_spec.rb b/spec/ruby/core/string/rpartition_spec.rb index c8f9afaee9..cef0384c73 100644 --- a/spec/ruby/core/string/rpartition_spec.rb +++ b/spec/ruby/core/string/rpartition_spec.rb @@ -46,4 +46,26 @@ describe "String#rpartition with String" do ->{ "hello".rpartition(5) }.should raise_error(TypeError) ->{ "hello".rpartition(nil) }.should raise_error(TypeError) end + + it "handles a pattern in a superset encoding" do + string = "hello".dup.force_encoding(Encoding::US_ASCII) + + result = string.rpartition("é") + + result.should == ["", "", "hello"] + result[0].encoding.should == Encoding::US_ASCII + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + pattern = "o".dup.force_encoding(Encoding::US_ASCII) + + result = "héllo world".rpartition(pattern) + + result.should == ["héllo w", "o", "rld"] + result[0].encoding.should == Encoding::UTF_8 + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::UTF_8 + end end diff --git a/spec/ruby/core/string/rstrip_spec.rb b/spec/ruby/core/string/rstrip_spec.rb index 4332b33b20..55773f5238 100644 --- a/spec/ruby/core/string/rstrip_spec.rb +++ b/spec/ruby/core/string/rstrip_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' require_relative 'shared/strip' @@ -51,12 +52,10 @@ describe "String#rstrip!" do " ".rstrip.should == "" end - ruby_version_is '3.0' do - it "removes trailing NULL bytes and whitespace" do - a = "\000 goodbye \000" - a.rstrip! - a.should == "\000 goodbye" - end + it "removes trailing NULL bytes and whitespace" do + a = "\000 goodbye \000" + a.rstrip! + a.should == "\000 goodbye" end it "raises a FrozenError on a frozen instance that is modified" do @@ -69,13 +68,13 @@ describe "String#rstrip!" do -> { "".freeze.rstrip! }.should raise_error(FrozenError) end - it "raises an ArgumentError if the last non-space codepoint is invalid" do + it "raises an Encoding::CompatibilityError if the last non-space codepoint is invalid" do s = "abc\xDF".force_encoding(Encoding::UTF_8) s.valid_encoding?.should be_false - -> { s.rstrip! }.should raise_error(ArgumentError) + -> { s.rstrip! }.should raise_error(Encoding::CompatibilityError) s = "abc\xDF ".force_encoding(Encoding::UTF_8) s.valid_encoding?.should be_false - -> { s.rstrip! }.should raise_error(ArgumentError) + -> { s.rstrip! }.should raise_error(Encoding::CompatibilityError) end end diff --git a/spec/ruby/core/string/scan_spec.rb b/spec/ruby/core/string/scan_spec.rb index ab73f5747b..bbe843b591 100644 --- a/spec/ruby/core/string/scan_spec.rb +++ b/spec/ruby/core/string/scan_spec.rb @@ -69,6 +69,12 @@ describe "String#scan" do it "does not raise any errors when passed a multi-byte string" do "あああaaaあああ".scan("あああ").should == ["あああ", "あああ"] end + + it "returns Strings in the same encoding as self" do + "cruel world".encode("US-ASCII").scan(/\w+/).each do |s| + s.encoding.should == Encoding::US_ASCII + end + end end describe "String#scan with pattern and block" do @@ -97,11 +103,11 @@ describe "String#scan with pattern and block" do offsets = [] str.scan(/([aeiou])/) do - md = $~ - md.string.should == str - matches << md.to_a - offsets << md.offset(0) - str + md = $~ + md.string.should == str + matches << md.to_a + offsets << md.offset(0) + str end matches.should == [["e", "e"], ["o", "o"]] @@ -111,11 +117,11 @@ describe "String#scan with pattern and block" do offsets = [] str.scan("l") do - md = $~ - md.string.should == str - matches << md.to_a - offsets << md.offset(0) - str + md = $~ + md.string.should == str + matches << md.to_a + offsets << md.offset(0) + str end matches.should == [["l"], ["l"]] @@ -159,11 +165,9 @@ describe "String#scan with pattern and block" do end end - ruby_version_is '3.0' do - it "yields String instances for subclasses" do - a = [] - StringSpecs::MyString.new("abc").scan(/./) { |s| a << s.class } - a.should == [String, String, String] - end + it "yields String instances for subclasses" do + a = [] + StringSpecs::MyString.new("abc").scan(/./) { |s| a << s.class } + a.should == [String, String, String] end end diff --git a/spec/ruby/core/string/scrub_spec.rb b/spec/ruby/core/string/scrub_spec.rb index 66755bcc7b..b9ef0f1a16 100644 --- a/spec/ruby/core/string/scrub_spec.rb +++ b/spec/ruby/core/string/scrub_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -31,18 +32,15 @@ describe "String#scrub with a default replacement" do input.scrub.should == "abc?????" end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(String) - input = [0x81].pack('C').force_encoding('utf-8') - StringSpecs::MyString.new(input).scrub.should be_an_instance_of(String) - end + it "returns a String in the same encoding as self" do + x81 = [0x81].pack('C').force_encoding('utf-8') + "abc\u3042#{x81}".scrub.encoding.should == Encoding::UTF_8 end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(StringSpecs::MyString) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(String) + input = [0x81].pack('C').force_encoding('utf-8') + StringSpecs::MyString.new(input).scrub.should be_an_instance_of(String) end end @@ -80,6 +78,11 @@ describe "String#scrub with a custom replacement" do block.should raise_error(ArgumentError) end + it "returns a String in the same encoding as self" do + x81 = [0x81].pack('C').force_encoding('utf-8') + "abc\u3042#{x81}".scrub("*").encoding.should == Encoding::UTF_8 + end + it "raises TypeError when a non String replacement is given" do x81 = [0x81].pack('C').force_encoding('utf-8') block = -> { "foo#{x81}".scrub(1) } @@ -87,12 +90,10 @@ describe "String#scrub with a custom replacement" do block.should raise_error(TypeError) end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("foo").scrub("*").should be_an_instance_of(String) - input = [0x81].pack('C').force_encoding('utf-8') - StringSpecs::MyString.new(input).scrub("*").should be_an_instance_of(String) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("foo").scrub("*").should be_an_instance_of(String) + input = [0x81].pack('C').force_encoding('utf-8') + StringSpecs::MyString.new(input).scrub("*").should be_an_instance_of(String) end end @@ -119,12 +120,10 @@ describe "String#scrub with a block" do replaced.should == "€€" end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("foo").scrub { |b| "*" }.should be_an_instance_of(String) - input = [0x81].pack('C').force_encoding('utf-8') - StringSpecs::MyString.new(input).scrub { |b| "<#{b.unpack("H*")[0]}>" }.should be_an_instance_of(String) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("foo").scrub { |b| "*" }.should be_an_instance_of(String) + input = [0x81].pack('C').force_encoding('utf-8') + StringSpecs::MyString.new(input).scrub { |b| "<#{b.unpack("H*")[0]}>" }.should be_an_instance_of(String) end end diff --git a/spec/ruby/core/string/setbyte_spec.rb b/spec/ruby/core/string/setbyte_spec.rb index 77bff64038..85403ca62c 100644 --- a/spec/ruby/core/string/setbyte_spec.rb +++ b/spec/ruby/core/string/setbyte_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' describe "String#setbyte" do diff --git a/spec/ruby/core/string/shared/byte_index_common.rb b/spec/ruby/core/string/shared/byte_index_common.rb new file mode 100644 index 0000000000..3de1453f4f --- /dev/null +++ b/spec/ruby/core/string/shared/byte_index_common.rb @@ -0,0 +1,63 @@ +# -*- encoding: utf-8 -*- +require_relative '../../../spec_helper' + +describe :byte_index_common, shared: true do + describe "raises on type errors" do + it "raises a TypeError if passed nil" do + -> { "abc".send(@method, nil) }.should raise_error(TypeError, "no implicit conversion of nil into String") + end + + it "raises a TypeError if passed a boolean" do + -> { "abc".send(@method, true) }.should raise_error(TypeError, "no implicit conversion of true into String") + end + + it "raises a TypeError if passed a Symbol" do + not_supported_on :opal do + -> { "abc".send(@method, :a) }.should raise_error(TypeError, "no implicit conversion of Symbol into String") + end + end + + it "raises a TypeError if passed a Symbol" do + obj = mock('x') + obj.should_not_receive(:to_int) + -> { "hello".send(@method, obj) }.should raise_error(TypeError, "no implicit conversion of MockObject into String") + end + + it "raises a TypeError if passed an Integer" do + -> { "abc".send(@method, 97) }.should raise_error(TypeError, "no implicit conversion of Integer into String") + end + end + + describe "with multibyte codepoints" do + it "raises an IndexError when byte offset lands in the middle of a multibyte character" do + -> { "わ".send(@method, "", 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "わ".send(@method, "", 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + -> { "わ".send(@method, "", -1) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + -> { "わ".send(@method, "", -2) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + re = Regexp.new "れ".encode(Encoding::EUC_JP) + -> do + "あれ".send(@method, re) + end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") + end + end + + describe "with global variables" do + it "doesn't set $~ for non regex search" do + $~ = nil + + 'hello.'.send(@method, 'll') + $~.should == nil + end + + it "sets $~ to MatchData of match and nil when there's none" do + 'hello.'.send(@method, /.e./) + $~[0].should == 'hel' + + 'hello.'.send(@method, /not/) + $~.should == nil + end + end +end diff --git a/spec/ruby/core/string/shared/chars.rb b/spec/ruby/core/string/shared/chars.rb index e9fdf89fd6..c730643cf4 100644 --- a/spec/ruby/core/string/shared/chars.rb +++ b/spec/ruby/core/string/shared/chars.rb @@ -21,12 +21,12 @@ describe :string_chars, shared: true do end it "returns characters in the same encoding as self" do - "&%".force_encoding('Shift_JIS').send(@method).to_a.all? {|c| c.encoding.name.should == 'Shift_JIS'} + "&%".dup.force_encoding('Shift_JIS').send(@method).to_a.all? {|c| c.encoding.name.should == 'Shift_JIS'} "&%".encode('BINARY').send(@method).to_a.all? {|c| c.encoding.should == Encoding::BINARY } end it "works with multibyte characters" do - s = "\u{8987}".force_encoding("UTF-8") + s = "\u{8987}".dup.force_encoding("UTF-8") s.bytesize.should == 3 s.send(@method).to_a.should == [s] end @@ -39,14 +39,14 @@ describe :string_chars, shared: true do end it "returns a different character if the String is transcoded" do - s = "\u{20AC}".force_encoding('UTF-8') - s.encode('UTF-8').send(@method).to_a.should == ["\u{20AC}".force_encoding('UTF-8')] + s = "\u{20AC}".dup.force_encoding('UTF-8') + s.encode('UTF-8').send(@method).to_a.should == ["\u{20AC}".dup.force_encoding('UTF-8')] s.encode('iso-8859-15').send(@method).to_a.should == [[0xA4].pack('C').force_encoding('iso-8859-15')] - s.encode('iso-8859-15').encode('UTF-8').send(@method).to_a.should == ["\u{20AC}".force_encoding('UTF-8')] + s.encode('iso-8859-15').encode('UTF-8').send(@method).to_a.should == ["\u{20AC}".dup.force_encoding('UTF-8')] end it "uses the String's encoding to determine what characters it contains" do - s = "\u{24B62}" + s = +"\u{24B62}" s.force_encoding('UTF-8').send(@method).to_a.should == [ s.force_encoding('UTF-8') diff --git a/spec/ruby/core/string/shared/codepoints.rb b/spec/ruby/core/string/shared/codepoints.rb index 0b2e078e0a..1c28ba3d5e 100644 --- a/spec/ruby/core/string/shared/codepoints.rb +++ b/spec/ruby/core/string/shared/codepoints.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary describe :string_codepoints, shared: true do it "returns self" do s = "foo" @@ -7,7 +7,7 @@ describe :string_codepoints, shared: true do end it "raises an ArgumentError when self has an invalid encoding and a method is called on the returned Enumerator" do - s = "\xDF".force_encoding(Encoding::UTF_8) + s = "\xDF".dup.force_encoding(Encoding::UTF_8) s.valid_encoding?.should be_false -> { s.send(@method).to_a }.should raise_error(ArgumentError) end @@ -21,7 +21,7 @@ describe :string_codepoints, shared: true do end it "raises an ArgumentError if self's encoding is invalid and a block is given" do - s = "\xDF".force_encoding(Encoding::UTF_8) + s = "\xDF".dup.force_encoding(Encoding::UTF_8) s.valid_encoding?.should be_false -> { s.send(@method) { } }.should raise_error(ArgumentError) end @@ -49,7 +49,7 @@ describe :string_codepoints, shared: true do it "round-trips to the original String using Integer#chr" do s = "\u{13}\u{7711}\u{1010}" - s2 = "" + s2 = +"" s.send(@method) {|n| s2 << n.chr(Encoding::UTF_8)} s.should == s2 end diff --git a/spec/ruby/core/string/shared/concat.rb b/spec/ruby/core/string/shared/concat.rb index 54ac1035d3..dded9a69e7 100644 --- a/spec/ruby/core/string/shared/concat.rb +++ b/spec/ruby/core/string/shared/concat.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false describe :string_concat, shared: true do it "concatenates the given argument to self and returns self" do str = 'hello ' @@ -5,18 +6,6 @@ describe :string_concat, shared: true do str.should == "hello world" end - it "converts the given argument to a String using to_str" do - obj = mock('world!') - obj.should_receive(:to_str).and_return("world!") - a = 'hello '.send(@method, obj) - a.should == 'hello world!' - end - - it "raises a TypeError if the given argument can't be converted to a String" do - -> { 'hello '.send(@method, []) }.should raise_error(TypeError) - -> { 'hello '.send(@method, mock('x')) }.should raise_error(TypeError) - end - it "raises a FrozenError when self is frozen" do a = "hello" a.freeze @@ -148,3 +137,23 @@ describe :string_concat_encoding, shared: true do end end end + +describe :string_concat_type_coercion, shared: true do + it "converts the given argument to a String using to_str" do + obj = mock('world!') + obj.should_receive(:to_str).and_return("world!") + a = 'hello '.send(@method, obj) + a.should == 'hello world!' + end + + it "raises a TypeError if the given argument can't be converted to a String" do + -> { 'hello '.send(@method, []) }.should raise_error(TypeError) + -> { 'hello '.send(@method, mock('x')) }.should raise_error(TypeError) + end + + it "raises a NoMethodError if the given argument raises a NoMethodError during type coercion to a String" do + obj = mock('world!') + obj.should_receive(:to_str).and_raise(NoMethodError) + -> { 'hello '.send(@method, obj) }.should raise_error(NoMethodError) + end +end diff --git a/spec/ruby/core/string/shared/dedup.rb b/spec/ruby/core/string/shared/dedup.rb index 6ffcb9b045..1ffd6aa0fd 100644 --- a/spec/ruby/core/string/shared/dedup.rb +++ b/spec/ruby/core/string/shared/dedup.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false describe :string_dedup, shared: true do it 'returns self if the String is frozen' do input = 'foo'.freeze @@ -47,11 +48,4 @@ describe :string_dedup, shared: true do dynamic.send(@method).should_not equal("this string is frozen".send(@method).freeze) dynamic.send(@method).should equal(dynamic) end - - ruby_version_is "3.0" do - it "interns the provided string if it is frozen" do - dynamic = "this string is unique and frozen #{rand}".freeze - dynamic.send(@method).should equal(dynamic) - end - end end diff --git a/spec/ruby/core/string/shared/each_codepoint_without_block.rb b/spec/ruby/core/string/shared/each_codepoint_without_block.rb index 92b7f76032..c88e5c54c7 100644 --- a/spec/ruby/core/string/shared/each_codepoint_without_block.rb +++ b/spec/ruby/core/string/shared/each_codepoint_without_block.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary describe :string_each_codepoint_without_block, shared: true do describe "when no block is given" do it "returns an Enumerator" do @@ -6,7 +6,7 @@ describe :string_each_codepoint_without_block, shared: true do end it "returns an Enumerator even when self has an invalid encoding" do - s = "\xDF".force_encoding(Encoding::UTF_8) + s = "\xDF".dup.force_encoding(Encoding::UTF_8) s.valid_encoding?.should be_false s.send(@method).should be_an_instance_of(Enumerator) end @@ -23,7 +23,7 @@ describe :string_each_codepoint_without_block, shared: true do end it "should return the size of the string even when the string has an invalid encoding" do - s = "\xDF".force_encoding(Encoding::UTF_8) + s = "\xDF".dup.force_encoding(Encoding::UTF_8) s.valid_encoding?.should be_false s.send(@method).size.should == 1 end diff --git a/spec/ruby/core/string/shared/each_line.rb b/spec/ruby/core/string/shared/each_line.rb index bfedf8f35a..231a6d9d4f 100644 --- a/spec/ruby/core/string/shared/each_line.rb +++ b/spec/ruby/core/string/shared/each_line.rb @@ -85,20 +85,10 @@ describe :string_each_line, shared: true do end end - ruby_version_is ''...'3.0' do - it "yields subclass instances for subclasses" do - a = [] - StringSpecs::MyString.new("hello\nworld").send(@method) { |s| a << s.class } - a.should == [StringSpecs::MyString, StringSpecs::MyString] - end - end - - ruby_version_is '3.0' do - it "yields String instances for subclasses" do - a = [] - StringSpecs::MyString.new("hello\nworld").send(@method) { |s| a << s.class } - a.should == [String, String] - end + it "yields String instances for subclasses" do + a = [] + StringSpecs::MyString.new("hello\nworld").send(@method) { |s| a << s.class } + a.should == [String, String] end it "returns self" do @@ -116,12 +106,18 @@ describe :string_each_line, shared: true do end it "does not care if the string is modified while substituting" do - str = "hello\nworld." + str = +"hello\nworld." out = [] str.send(@method){|x| out << x; str[-1] = '!' }.should == "hello\nworld!" out.should == ["hello\n", "world."] end + it "returns Strings in the same encoding as self" do + "one\ntwo\r\nthree".encode("US-ASCII").send(@method) do |s| + s.encoding.should == Encoding::US_ASCII + end + end + it "raises a TypeError when the separator can't be converted to a string" do -> { "hello world".send(@method, false) {} }.should raise_error(TypeError) -> { "hello world".send(@method, mock('x')) {} }.should raise_error(TypeError) diff --git a/spec/ruby/core/string/shared/encode.rb b/spec/ruby/core/string/shared/encode.rb index a73de5b943..9466308886 100644 --- a/spec/ruby/core/string/shared/encode.rb +++ b/spec/ruby/core/string/shared/encode.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false describe :string_encode, shared: true do describe "when passed no options" do it "transcodes to Encoding.default_internal when set" do @@ -193,6 +194,190 @@ describe :string_encode, shared: true do end end + describe "given the fallback option" do + context "given a hash" do + it "looks up the replacement value from the hash" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: { "\ufffd" => "bar" }) + encoded.should == "Bbar" + end + + it "calls to_str on the returned value" do + obj = Object.new + obj.should_receive(:to_str).and_return("bar") + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: { "\ufffd" => obj }) + encoded.should == "Bbar" + end + + it "does not call to_s on the returned value" do + obj = Object.new + obj.should_not_receive(:to_s) + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: { "\ufffd" => obj }) + }.should raise_error(TypeError, "no implicit conversion of Object into String") + end + + it "raises an error if the key is not present in the hash" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: { "foo" => "bar" }) + }.should raise_error(Encoding::UndefinedConversionError, "U+FFFD from UTF-8 to US-ASCII") + end + + it "raises an error if the value is itself invalid" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: { "\ufffd" => "\uffee" }) + }.should raise_error(ArgumentError, "too big fallback string") + end + + it "uses the hash's default value if set" do + hash = {} + hash.default = "bar" + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: hash) + encoded.should == "Bbar" + end + + it "uses the result of calling default_proc if set" do + hash = {} + hash.default_proc = -> _, _ { "bar" } + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: hash) + encoded.should == "Bbar" + end + end + + context "given an object inheriting from Hash" do + before do + klass = Class.new(Hash) + @hash_like = klass.new + @hash_like["\ufffd"] = "bar" + end + + it "looks up the replacement value from the object" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: @hash_like) + encoded.should == "Bbar" + end + end + + context "given an object responding to []" do + before do + klass = Class.new do + def [](c) = c.bytes.inspect + end + @hash_like = klass.new + end + + it "calls [] on the object, passing the invalid character" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: @hash_like) + encoded.should == "B[239, 191, 189]" + end + end + + context "given an object not responding to []" do + before do + @non_hash_like = Object.new + end + + it "raises an error" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: @non_hash_like) + }.should raise_error(Encoding::UndefinedConversionError, "U+FFFD from UTF-8 to US-ASCII") + end + end + + context "given a proc" do + it "calls the proc to get the replacement value, passing in the invalid character" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: proc { |c| c.bytes.inspect }) + encoded.should == "B[239, 191, 189]" + end + + it "calls to_str on the returned value" do + obj = Object.new + obj.should_receive(:to_str).and_return("bar") + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: proc { |c| obj }) + encoded.should == "Bbar" + end + + it "does not call to_s on the returned value" do + obj = Object.new + obj.should_not_receive(:to_s) + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: proc { |c| obj }) + }.should raise_error(TypeError, "no implicit conversion of Object into String") + end + + it "raises an error if the returned value is itself invalid" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { "\uffee" }) + }.should raise_error(ArgumentError, "too big fallback string") + end + end + + context "given a lambda" do + it "calls the lambda to get the replacement value, passing in the invalid character" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { c.bytes.inspect }) + encoded.should == "B[239, 191, 189]" + end + + it "calls to_str on the returned value" do + obj = Object.new + obj.should_receive(:to_str).and_return("bar") + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { obj }) + encoded.should == "Bbar" + end + + it "does not call to_s on the returned value" do + obj = Object.new + obj.should_not_receive(:to_s) + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { obj }) + }.should raise_error(TypeError, "no implicit conversion of Object into String") + end + + it "raises an error if the returned value is itself invalid" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { "\uffee" }) + }.should raise_error(ArgumentError, "too big fallback string") + end + end + + context "given a method" do + def replace(c) = c.bytes.inspect + def replace_bad(c) = "\uffee" + + def replace_to_str(c) + obj = Object.new + obj.should_receive(:to_str).and_return("bar") + obj + end + + def replace_to_s(c) + obj = Object.new + obj.should_not_receive(:to_s) + obj + end + + it "calls the method to get the replacement value, passing in the invalid character" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: method(:replace)) + encoded.should == "B[239, 191, 189]" + end + + it "calls to_str on the returned value" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: method(:replace_to_str)) + encoded.should == "Bbar" + end + + it "does not call to_s on the returned value" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: method(:replace_to_s)) + }.should raise_error(TypeError, "no implicit conversion of Object into String") + end + + it "raises an error if the returned value is itself invalid" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: method(:replace_bad)) + }.should raise_error(ArgumentError, "too big fallback string") + end + end + end + describe "given the xml: :text option" do it "replaces all instances of '&' with '&'" do '& and &'.send(@method, "UTF-8", xml: :text).should == '& and &' diff --git a/spec/ruby/core/string/shared/eql.rb b/spec/ruby/core/string/shared/eql.rb index 6f268c929c..d5af337d53 100644 --- a/spec/ruby/core/string/shared/eql.rb +++ b/spec/ruby/core/string/shared/eql.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' @@ -13,15 +13,15 @@ describe :string_eql_value, shared: true do end it "ignores encoding difference of compatible string" do - "hello".force_encoding("utf-8").send(@method, "hello".force_encoding("iso-8859-1")).should be_true + "hello".dup.force_encoding("utf-8").send(@method, "hello".dup.force_encoding("iso-8859-1")).should be_true end it "considers encoding difference of incompatible string" do - "\xff".force_encoding("utf-8").send(@method, "\xff".force_encoding("iso-8859-1")).should be_false + "\xff".dup.force_encoding("utf-8").send(@method, "\xff".dup.force_encoding("iso-8859-1")).should be_false end it "considers encoding compatibility" do - "abcd".force_encoding("utf-8").send(@method, "abcd".force_encoding("utf-32le")).should be_false + "abcd".dup.force_encoding("utf-8").send(@method, "abcd".dup.force_encoding("utf-32le")).should be_false end it "ignores subclass differences" do @@ -33,6 +33,6 @@ describe :string_eql_value, shared: true do end it "returns true when comparing 2 empty strings but one is not ASCII-compatible" do - "".send(@method, "".force_encoding('iso-2022-jp')).should == true + "".send(@method, "".dup.force_encoding('iso-2022-jp')).should == true end end diff --git a/spec/ruby/core/string/shared/length.rb b/spec/ruby/core/string/shared/length.rb index 94e5ec135b..ae572ba755 100644 --- a/spec/ruby/core/string/shared/length.rb +++ b/spec/ruby/core/string/shared/length.rb @@ -18,7 +18,7 @@ describe :string_length, shared: true do end it "returns the length of the new self after encoding is changed" do - str = 'こにちわ' + str = +'こにちわ' str.send(@method) str.force_encoding('BINARY').send(@method).should == 12 @@ -44,12 +44,12 @@ describe :string_length, shared: true do end it "adds 1 (and not 2) for a incomplete surrogate in UTF-16" do - "\x00\xd8".force_encoding("UTF-16LE").send(@method).should == 1 - "\xd8\x00".force_encoding("UTF-16BE").send(@method).should == 1 + "\x00\xd8".dup.force_encoding("UTF-16LE").send(@method).should == 1 + "\xd8\x00".dup.force_encoding("UTF-16BE").send(@method).should == 1 end it "adds 1 for a broken sequence in UTF-32" do - "\x04\x03\x02\x01".force_encoding("UTF-32LE").send(@method).should == 1 - "\x01\x02\x03\x04".force_encoding("UTF-32BE").send(@method).should == 1 + "\x04\x03\x02\x01".dup.force_encoding("UTF-32LE").send(@method).should == 1 + "\x01\x02\x03\x04".dup.force_encoding("UTF-32BE").send(@method).should == 1 end end diff --git a/spec/ruby/core/string/shared/partition.rb b/spec/ruby/core/string/shared/partition.rb index 7dc3d9cc0b..4cac149ce5 100644 --- a/spec/ruby/core/string/shared/partition.rb +++ b/spec/ruby/core/string/shared/partition.rb @@ -2,35 +2,32 @@ require_relative '../../../spec_helper' require_relative '../fixtures/classes' describe :string_partition, shared: true do - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("hello").send(@method, "l").each do |item| - item.should be_an_instance_of(String) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("hello").send(@method, "l").each do |item| + item.should be_an_instance_of(String) + end - StringSpecs::MyString.new("hello").send(@method, "x").each do |item| - item.should be_an_instance_of(String) - end + StringSpecs::MyString.new("hello").send(@method, "x").each do |item| + item.should be_an_instance_of(String) + end - StringSpecs::MyString.new("hello").send(@method, /l./).each do |item| - item.should be_an_instance_of(String) - end + StringSpecs::MyString.new("hello").send(@method, /l./).each do |item| + item.should be_an_instance_of(String) end end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("hello").send(@method, StringSpecs::MyString.new("l")).each do |item| - item.should be_an_instance_of(StringSpecs::MyString) - end + it "returns before- and after- parts in the same encoding as self" do + strings = "hello".encode("US-ASCII").send(@method, "ello") + strings[0].encoding.should == Encoding::US_ASCII + strings[2].encoding.should == Encoding::US_ASCII - StringSpecs::MyString.new("hello").send(@method, "x").each do |item| - item.should be_an_instance_of(StringSpecs::MyString) - end + strings = "hello".encode("US-ASCII").send(@method, /ello/) + strings[0].encoding.should == Encoding::US_ASCII + strings[2].encoding.should == Encoding::US_ASCII + end - StringSpecs::MyString.new("hello").send(@method, /l./).each do |item| - item.should be_an_instance_of(StringSpecs::MyString) - end - end + it "returns the matching part in the separator's encoding" do + strings = "hello".encode("US-ASCII").send(@method, "ello") + strings[1].encoding.should == Encoding::UTF_8 end end diff --git a/spec/ruby/core/string/shared/replace.rb b/spec/ruby/core/string/shared/replace.rb index a5108d9e7c..24dac0eb27 100644 --- a/spec/ruby/core/string/shared/replace.rb +++ b/spec/ruby/core/string/shared/replace.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false describe :string_replace, shared: true do it "returns self" do a = "a" diff --git a/spec/ruby/core/string/shared/slice.rb b/spec/ruby/core/string/shared/slice.rb index 713234fffd..7b9b9f6a14 100644 --- a/spec/ruby/core/string/shared/slice.rb +++ b/spec/ruby/core/string/shared/slice.rb @@ -80,12 +80,12 @@ describe :string_slice_index_length, shared: true do "hello there".send(@method, -3,2).should == "er" end - it "returns a string with the same encoding" do + it "returns a string with the same encoding as self" do s = "hello there" s.send(@method, 1, 9).encoding.should == s.encoding - a = "hello".force_encoding("binary") - b = " there".force_encoding("ISO-8859-1") + a = "hello".dup.force_encoding("binary") + b = " there".dup.force_encoding("ISO-8859-1") c = (a + b).force_encoding(Encoding::US_ASCII) c.send(@method, 0, 5).encoding.should == Encoding::US_ASCII @@ -119,6 +119,18 @@ describe :string_slice_index_length, shared: true do "hello there".send(@method, -4,-3).should == nil end + platform_is pointer_size: 64 do + it "returns nil if the length is negative big value" do + "hello there".send(@method, 4, -(1 << 31)).should == nil + + # by some reason length < -(1 << 31) on CI on Windows leads to + # 'RangeError: bignum too big to convert into `long'' error + platform_is_not :windows do + "hello there".send(@method, 4, -(1 << 63)).should == nil + end + end + end + it "calls to_int on the given index and the given length" do "hello".send(@method, 0.5, 1).should == "h" "hello".send(@method, 0.5, 2.5).should == "he" @@ -152,22 +164,16 @@ describe :string_slice_index_length, shared: true do -> { "hello".send(@method, 0, bignum_value) }.should raise_error(RangeError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, 0,0).should be_an_instance_of(StringSpecs::MyString) - s.send(@method, 0,4).should be_an_instance_of(StringSpecs::MyString) - s.send(@method, 1,4).should be_an_instance_of(StringSpecs::MyString) - end + it "raises a RangeError if the index or length is too small" do + -> { "hello".send(@method, -bignum_value, 1) }.should raise_error(RangeError) + -> { "hello".send(@method, 0, -bignum_value) }.should raise_error(RangeError) end - ruby_version_is '3.0' do - it "returns String instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, 0,0).should be_an_instance_of(String) - s.send(@method, 0,4).should be_an_instance_of(String) - s.send(@method, 1,4).should be_an_instance_of(String) - end + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, 0,0).should be_an_instance_of(String) + s.send(@method, 0,4).should be_an_instance_of(String) + s.send(@method, 1,4).should be_an_instance_of(String) end it "handles repeated application" do @@ -206,6 +212,10 @@ describe :string_slice_range, shared: true do "x".send(@method, 1..-1).should == "" end + it "returns a String in the same encoding as self" do + "hello there".encode("US-ASCII").send(@method, 1..1).encoding.should == Encoding::US_ASCII + end + it "returns nil if the beginning of the range falls outside of self" do "hello there".send(@method, 12..-1).should == nil "hello there".send(@method, 20..25).should == nil @@ -238,22 +248,11 @@ describe :string_slice_range, shared: true do "x".send(@method, 1...-1).should == "" end - ruby_version_is ''...'3.0' do - it "returns subclass instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, 0...0).should be_an_instance_of(StringSpecs::MyString) - s.send(@method, 0..4).should be_an_instance_of(StringSpecs::MyString) - s.send(@method, 1..4).should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, 0...0).should be_an_instance_of(String) - s.send(@method, 0..4).should be_an_instance_of(String) - s.send(@method, 1..4).should be_an_instance_of(String) - end + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, 0...0).should be_an_instance_of(String) + s.send(@method, 0..4).should be_an_instance_of(String) + s.send(@method, 1..4).should be_an_instance_of(String) end it "calls to_int on range arguments" do @@ -328,23 +327,14 @@ describe :string_slice_regexp, shared: true do "hello there".send(@method, /xyz/).should == nil end - not_supported_on :opal do - end - - ruby_version_is ''...'3.0' do - it "returns subclass instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, //).should be_an_instance_of(StringSpecs::MyString) - s.send(@method, /../).should be_an_instance_of(StringSpecs::MyString) - end + it "returns a String in the same encoding as self" do + "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/).encoding.should == Encoding::US_ASCII end - ruby_version_is '3.0' do - it "returns String instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, //).should be_an_instance_of(String) - s.send(@method, /../).should be_an_instance_of(String) - end + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, //).should be_an_instance_of(String) + s.send(@method, /../).should be_an_instance_of(String) end it "sets $~ to MatchData when there is a match and nil when there's none" do @@ -391,6 +381,10 @@ describe :string_slice_regexp_index, shared: true do $~[1].should == nil end + it "returns a String in the same encoding as self" do + "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/, 0).encoding.should == Encoding::US_ASCII + end + it "calls to_int on the given index" do obj = mock('2') obj.should_receive(:to_int).and_return(2) @@ -409,20 +403,10 @@ describe :string_slice_regexp_index, shared: true do -> { "hello".send(@method, /(.)(.)(.)/, nil) }.should raise_error(TypeError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, /(.)(.)/, 0).should be_an_instance_of(StringSpecs::MyString) - s.send(@method, /(.)(.)/, 1).should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, /(.)(.)/, 0).should be_an_instance_of(String) - s.send(@method, /(.)(.)/, 1).should be_an_instance_of(String) - end + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, /(.)(.)/, 0).should be_an_instance_of(String) + s.send(@method, /(.)(.)/, 1).should be_an_instance_of(String) end it "sets $~ to MatchData when there is a match and nil when there's none" do @@ -461,22 +445,11 @@ describe :string_slice_string, shared: true do -> { "hello".send(@method, o) }.should raise_error(TypeError) end - ruby_version_is ''...'3.0' do - it "returns a subclass instance when given a subclass instance" do - s = StringSpecs::MyString.new("el") - r = "hello".send(@method, s) - r.should == "el" - r.should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns a String instance when given a subclass instance" do - s = StringSpecs::MyString.new("el") - r = "hello".send(@method, s) - r.should == "el" - r.should be_an_instance_of(String) - end + it "returns a String instance when given a subclass instance" do + s = StringSpecs::MyString.new("el") + r = "hello".send(@method, s) + r.should == "el" + r.should be_an_instance_of(String) end end @@ -522,18 +495,9 @@ describe :string_slice_regexp_group, shared: true do -> { "hello".send(@method, /(?<q>)/, '') }.should raise_error(IndexError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, /(?<q>.)/, 'q').should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances" do - s = StringSpecs::MyString.new("hello") - s.send(@method, /(?<q>.)/, 'q').should be_an_instance_of(String) - end + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, /(?<q>.)/, 'q').should be_an_instance_of(String) end it "sets $~ to MatchData when there is a match and nil when there's none" do diff --git a/spec/ruby/core/string/shared/strip.rb b/spec/ruby/core/string/shared/strip.rb index 9c232b4694..3af77b50fe 100644 --- a/spec/ruby/core/string/shared/strip.rb +++ b/spec/ruby/core/string/shared/strip.rb @@ -2,19 +2,13 @@ require_relative '../../../spec_helper' require_relative '../fixtures/classes' describe :string_strip, shared: true do - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new(" hello ").send(@method).should be_an_instance_of(String) - StringSpecs::MyString.new(" ").send(@method).should be_an_instance_of(String) - StringSpecs::MyString.new("").send(@method).should be_an_instance_of(String) - end + it "returns a String in the same encoding as self" do + " hello ".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new(" hello ").send(@method).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new(" ").send(@method).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("").send(@method).should be_an_instance_of(StringSpecs::MyString) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new(" hello ").send(@method).should be_an_instance_of(String) + StringSpecs::MyString.new(" ").send(@method).should be_an_instance_of(String) + StringSpecs::MyString.new("").send(@method).should be_an_instance_of(String) end end diff --git a/spec/ruby/core/string/shared/succ.rb b/spec/ruby/core/string/shared/succ.rb index 66edf6dc82..7c68345f10 100644 --- a/spec/ruby/core/string/shared/succ.rb +++ b/spec/ruby/core/string/shared/succ.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary describe :string_succ, shared: true do it "returns an empty string for empty strings" do "".send(@method).should == "" @@ -59,26 +59,21 @@ describe :string_succ, shared: true do "\xFF\xFF".send(@method).should == "\x01\x00\x00" end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("").send(@method).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("a").send(@method).should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("z").send(@method).should be_an_instance_of(StringSpecs::MyString) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("").send(@method).should be_an_instance_of(String) + StringSpecs::MyString.new("a").send(@method).should be_an_instance_of(String) + StringSpecs::MyString.new("z").send(@method).should be_an_instance_of(String) end - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("").send(@method).should be_an_instance_of(String) - StringSpecs::MyString.new("a").send(@method).should be_an_instance_of(String) - StringSpecs::MyString.new("z").send(@method).should be_an_instance_of(String) - end + it "returns a String in the same encoding as self" do + "z".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII end end describe :string_succ_bang, shared: true do it "is equivalent to succ, but modifies self in place (still returns self)" do ["", "abcd", "THX1138"].each do |s| + s = +s r = s.dup.send(@method) s.send(@method).should equal(s) s.should == r diff --git a/spec/ruby/core/string/shared/to_a.rb b/spec/ruby/core/string/shared/to_a.rb deleted file mode 100644 index bad3ea6584..0000000000 --- a/spec/ruby/core/string/shared/to_a.rb +++ /dev/null @@ -1,9 +0,0 @@ -describe :string_to_a, shared: true do - it "returns an empty array for empty strings" do - "".send(@method).should == [] - end - - it "returns an array containing the string for non-empty strings" do - "hello".send(@method).should == ["hello"] - end -end diff --git a/spec/ruby/core/string/shared/to_sym.rb b/spec/ruby/core/string/shared/to_sym.rb index ef7c22bb6a..833eae100e 100644 --- a/spec/ruby/core/string/shared/to_sym.rb +++ b/spec/ruby/core/string/shared/to_sym.rb @@ -56,9 +56,9 @@ describe :string_to_sym, shared: true do it "ignores existing symbols with different encoding" do source = "fée" - iso_symbol = source.force_encoding(Encoding::ISO_8859_1).send(@method) + iso_symbol = source.dup.force_encoding(Encoding::ISO_8859_1).send(@method) iso_symbol.encoding.should == Encoding::ISO_8859_1 - binary_symbol = source.force_encoding(Encoding::BINARY).send(@method) + binary_symbol = source.dup.force_encoding(Encoding::BINARY).send(@method) binary_symbol.encoding.should == Encoding::BINARY end @@ -67,6 +67,6 @@ describe :string_to_sym, shared: true do invalid_utf8.should_not.valid_encoding? -> { invalid_utf8.send(@method) - }.should raise_error(EncodingError, /invalid/) + }.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"') end end diff --git a/spec/ruby/core/string/slice_spec.rb b/spec/ruby/core/string/slice_spec.rb index c9e13ed1bc..5aba2d3be0 100644 --- a/spec/ruby/core/string/slice_spec.rb +++ b/spec/ruby/core/string/slice_spec.rb @@ -1,5 +1,5 @@ # -*- encoding: utf-8 -*- - +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' require_relative 'shared/slice' @@ -132,20 +132,10 @@ describe "String#slice! with index, length" do "hello".slice!(obj, obj).should == "ll" end - ruby_version_is ''...'3.0' do - it "returns subclass instances" do - s = StringSpecs::MyString.new("hello") - s.slice!(0, 0).should be_an_instance_of(StringSpecs::MyString) - s.slice!(0, 4).should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances" do - s = StringSpecs::MyString.new("hello") - s.slice!(0, 0).should be_an_instance_of(String) - s.slice!(0, 4).should be_an_instance_of(String) - end + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.slice!(0, 0).should be_an_instance_of(String) + s.slice!(0, 4).should be_an_instance_of(String) end it "returns the substring given by the character offsets" do @@ -185,20 +175,10 @@ describe "String#slice! Range" do b.should == "hello" end - ruby_version_is ''...'3.0' do - it "returns subclass instances" do - s = StringSpecs::MyString.new("hello") - s.slice!(0...0).should be_an_instance_of(StringSpecs::MyString) - s.slice!(0..4).should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances" do - s = StringSpecs::MyString.new("hello") - s.slice!(0...0).should be_an_instance_of(String) - s.slice!(0..4).should be_an_instance_of(String) - end + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.slice!(0...0).should be_an_instance_of(String) + s.slice!(0..4).should be_an_instance_of(String) end it "calls to_int on range arguments" do @@ -274,20 +254,10 @@ describe "String#slice! with Regexp" do s.should == "this is a string" end - ruby_version_is ''...'3.0' do - it "returns subclass instances" do - s = StringSpecs::MyString.new("hello") - s.slice!(//).should be_an_instance_of(StringSpecs::MyString) - s.slice!(/../).should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances" do - s = StringSpecs::MyString.new("hello") - s.slice!(//).should be_an_instance_of(String) - s.slice!(/../).should be_an_instance_of(String) - end + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.slice!(//).should be_an_instance_of(String) + s.slice!(/../).should be_an_instance_of(String) end it "returns the matching portion of self with a multi byte character" do @@ -344,20 +314,10 @@ describe "String#slice! with Regexp, index" do "har".slice!(/(.)(.)(.)/, obj).should == "a" end - ruby_version_is ''...'3.0' do - it "returns subclass instances" do - s = StringSpecs::MyString.new("hello") - s.slice!(/(.)(.)/, 0).should be_an_instance_of(StringSpecs::MyString) - s.slice!(/(.)(.)/, 1).should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances" do - s = StringSpecs::MyString.new("hello") - s.slice!(/(.)(.)/, 0).should be_an_instance_of(String) - s.slice!(/(.)(.)/, 1).should be_an_instance_of(String) - end + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.slice!(/(.)(.)/, 0).should be_an_instance_of(String) + s.slice!(/(.)(.)/, 1).should be_an_instance_of(String) end it "returns the encoding aware capture for the given index" do @@ -415,22 +375,11 @@ describe "String#slice! with String" do -> { "hello".slice!(o) }.should raise_error(TypeError) end - ruby_version_is ''...'3.0' do - it "returns a subclass instance when given a subclass instance" do - s = StringSpecs::MyString.new("el") - r = "hello".slice!(s) - r.should == "el" - r.should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns a subclass instance when given a subclass instance" do - s = StringSpecs::MyString.new("el") - r = "hello".slice!(s) - r.should == "el" - r.should be_an_instance_of(String) - end + it "returns a subclass instance when given a subclass instance" do + s = StringSpecs::MyString.new("el") + r = "hello".slice!(s) + r.should == "el" + r.should be_an_instance_of(String) end it "raises a FrozenError if self is frozen" do diff --git a/spec/ruby/core/string/split_spec.rb b/spec/ruby/core/string/split_spec.rb index 0417486692..3c6d1864d1 100644 --- a/spec/ruby/core/string/split_spec.rb +++ b/spec/ruby/core/string/split_spec.rb @@ -4,7 +4,7 @@ require_relative 'fixtures/classes' describe "String#split with String" do it "throws an ArgumentError if the string is not a valid" do - s = "\xDF".force_encoding(Encoding::UTF_8) + s = "\xDF".dup.force_encoding(Encoding::UTF_8) -> { s.split }.should raise_error(ArgumentError) -> { s.split(':') }.should raise_error(ArgumentError) @@ -12,7 +12,7 @@ describe "String#split with String" do it "throws an ArgumentError if the pattern is not a valid string" do str = 'проверка' - broken_str = "\xDF".force_encoding(Encoding::UTF_8) + broken_str = "\xDF".dup.force_encoding(Encoding::UTF_8) -> { str.split(broken_str) }.should raise_error(ArgumentError) end @@ -192,44 +192,16 @@ describe "String#split with String" do "foo".split("bar", 3).should == ["foo"] end - ruby_version_is ''...'3.0' do - it "returns subclass instances based on self" do - ["", "x.y.z.", " x y "].each do |str| - ["", ".", " "].each do |pat| - [-1, 0, 1, 2].each do |limit| - StringSpecs::MyString.new(str).split(pat, limit).each do |x| - x.should be_an_instance_of(StringSpecs::MyString) - end - - str.split(StringSpecs::MyString.new(pat), limit).each do |x| - x.should be_an_instance_of(String) - end + it "returns String instances based on self" do + ["", "x.y.z.", " x y "].each do |str| + ["", ".", " "].each do |pat| + [-1, 0, 1, 2].each do |limit| + StringSpecs::MyString.new(str).split(pat, limit).each do |x| + x.should be_an_instance_of(String) end - end - end - end - - it "does not call constructor on created subclass instances" do - # can't call should_not_receive on an object that doesn't yet exist - # so failure here is signalled by exception, not expectation failure - - s = StringSpecs::StringWithRaisingConstructor.new('silly:string') - s.split(':').first.should == 'silly' - end - end - ruby_version_is '3.0' do - it "returns String instances based on self" do - ["", "x.y.z.", " x y "].each do |str| - ["", ".", " "].each do |pat| - [-1, 0, 1, 2].each do |limit| - StringSpecs::MyString.new(str).split(pat, limit).each do |x| - x.should be_an_instance_of(String) - end - - str.split(StringSpecs::MyString.new(pat), limit).each do |x| - x.should be_an_instance_of(String) - end + str.split(StringSpecs::MyString.new(pat), limit).each do |x| + x.should be_an_instance_of(String) end end end @@ -246,11 +218,18 @@ describe "String#split with String" do it "doesn't split on non-ascii whitespace" do "a\u{2008}b".split(" ").should == ["a\u{2008}b"] end + + it "returns Strings in the same encoding as self" do + strings = "hello world".encode("US-ASCII").split(" ") + + strings[0].encoding.should == Encoding::US_ASCII + strings[1].encoding.should == Encoding::US_ASCII + end end describe "String#split with Regexp" do it "throws an ArgumentError if the string is not a valid" do - s = "\xDF".force_encoding(Encoding::UTF_8) + s = "\xDF".dup.force_encoding(Encoding::UTF_8) -> { s.split(/./) }.should raise_error(ArgumentError) end @@ -407,55 +386,30 @@ describe "String#split with Regexp" do "foo".split(/bar/, 3).should == ["foo"] end - ruby_version_is ''...'3.0' do - it "returns subclass instances based on self" do - ["", "x:y:z:", " x y "].each do |str| - [//, /:/, /\s+/].each do |pat| - [-1, 0, 1, 2].each do |limit| - StringSpecs::MyString.new(str).split(pat, limit).each do |x| - x.should be_an_instance_of(StringSpecs::MyString) - end - end - end - end - end - - it "does not call constructor on created subclass instances" do - # can't call should_not_receive on an object that doesn't yet exist - # so failure here is signalled by exception, not expectation failure - - s = StringSpecs::StringWithRaisingConstructor.new('silly:string') - s.split(/:/).first.should == 'silly' - end - end - - ruby_version_is '3.0' do - it "returns String instances based on self" do - ["", "x:y:z:", " x y "].each do |str| - [//, /:/, /\s+/].each do |pat| - [-1, 0, 1, 2].each do |limit| - StringSpecs::MyString.new(str).split(pat, limit).each do |x| - x.should be_an_instance_of(String) - end + it "returns String instances based on self" do + ["", "x:y:z:", " x y "].each do |str| + [//, /:/, /\s+/].each do |pat| + [-1, 0, 1, 2].each do |limit| + StringSpecs::MyString.new(str).split(pat, limit).each do |x| + x.should be_an_instance_of(String) end end end end end - it "retains the encoding of the source string" do + it "returns Strings in the same encoding as self" do ary = "а б в".split encodings = ary.map { |s| s.encoding } encodings.should == [Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8] end - it "splits a string on each character for a multibyte encoding and empty split" do "That's why efficiency could not be helped".split("").size.should == 39 end it "returns an ArgumentError if an invalid UTF-8 string is supplied" do - broken_str = 'проверка' # in russian, means "test" + broken_str = +'проверка' # in russian, means "test" broken_str.force_encoding('binary') broken_str.chop! broken_str.force_encoding('utf-8') @@ -563,32 +517,16 @@ describe "String#split with Regexp" do end describe "for a String subclass" do - ruby_version_is ''...'3.0' do - it "yields instances of the same subclass" do - a = [] - StringSpecs::MyString.new("a|b").split("|") { |str| a << str } - first, last = a - - first.should be_an_instance_of(StringSpecs::MyString) - first.should == "a" - - last.should be_an_instance_of(StringSpecs::MyString) - last.should == "b" - end - end - - ruby_version_is '3.0' do - it "yields instances of String" do - a = [] - StringSpecs::MyString.new("a|b").split("|") { |str| a << str } - first, last = a + it "yields instances of String" do + a = [] + StringSpecs::MyString.new("a|b").split("|") { |str| a << str } + first, last = a - first.should be_an_instance_of(String) - first.should == "a" + first.should be_an_instance_of(String) + first.should == "a" - last.should be_an_instance_of(String) - last.should == "b" - end + last.should be_an_instance_of(String) + last.should == "b" end end @@ -598,4 +536,11 @@ describe "String#split with Regexp" do -> { "hello".split(false) }.should raise_error(TypeError) -> { "hello".split(Object.new) }.should raise_error(TypeError) end + + it "returns Strings in the same encoding as self" do + strings = "hello world".encode("US-ASCII").split(/ /) + + strings[0].encoding.should == Encoding::US_ASCII + strings[1].encoding.should == Encoding::US_ASCII + end end diff --git a/spec/ruby/core/string/squeeze_spec.rb b/spec/ruby/core/string/squeeze_spec.rb index 5dc12a4247..981d480684 100644 --- a/spec/ruby/core/string/squeeze_spec.rb +++ b/spec/ruby/core/string/squeeze_spec.rb @@ -1,4 +1,5 @@ -# -*- encoding: binary -*- +# encoding: binary +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -64,22 +65,19 @@ describe "String#squeeze" do "hello room".squeeze(other_string, other_string2).should == "hello rom" end + it "returns a String in the same encoding as self" do + "yellow moon".encode("US-ASCII").squeeze.encoding.should == Encoding::US_ASCII + "yellow moon".encode("US-ASCII").squeeze("a").encoding.should == Encoding::US_ASCII + end + it "raises a TypeError when one set arg can't be converted to a string" do -> { "hello world".squeeze([]) }.should raise_error(TypeError) -> { "hello world".squeeze(Object.new)}.should raise_error(TypeError) -> { "hello world".squeeze(mock('x')) }.should raise_error(TypeError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("oh no!!!").squeeze("!").should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("oh no!!!").squeeze("!").should be_an_instance_of(String) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("oh no!!!").squeeze("!").should be_an_instance_of(String) end end diff --git a/spec/ruby/core/string/start_with_spec.rb b/spec/ruby/core/string/start_with_spec.rb index 3833289f96..35e33b46a6 100644 --- a/spec/ruby/core/string/start_with_spec.rb +++ b/spec/ruby/core/string/start_with_spec.rb @@ -7,12 +7,21 @@ describe "String#start_with?" do it_behaves_like :start_with, :to_s # Here and not in the shared examples because this is invalid as a Symbol - it "does not check that we are not starting to match at the head of a character" do + it "matches part of a character with the same part" do "\xA9".should.start_with?("\xA9") # A9 is not a character head for UTF-8 end - it "does not check we are matching only part of a character" do - "\xe3\x81\x82".size.should == 1 - "\xe3\x81\x82".should.start_with?("\xe3") + ruby_version_is ""..."3.3" do + it "does not check we are matching only part of a character" do + "\xe3\x81\x82".size.should == 1 + "\xe3\x81\x82".should.start_with?("\xe3") + end + end + + ruby_version_is "3.3" do # #19784 + it "checks we are matching only part of a character" do + "\xe3\x81\x82".size.should == 1 + "\xe3\x81\x82".should_not.start_with?("\xe3") + end end end diff --git a/spec/ruby/core/string/strip_spec.rb b/spec/ruby/core/string/strip_spec.rb index 662f13b032..edb6ea3b44 100644 --- a/spec/ruby/core/string/strip_spec.rb +++ b/spec/ruby/core/string/strip_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' require_relative 'shared/strip' @@ -11,10 +12,8 @@ describe "String#strip" do "\tgoodbye\r\v\n".strip.should == "goodbye" end - ruby_version_is '3.0' do - it "returns a copy of self without leading and trailing NULL bytes and whitespace" do - " \x00 goodbye \x00 ".strip.should == "goodbye" - end + it "returns a copy of self without leading and trailing NULL bytes and whitespace" do + " \x00 goodbye \x00 ".strip.should == "goodbye" end end @@ -41,12 +40,10 @@ describe "String#strip!" do " ".strip.should == "" end - ruby_version_is '3.0' do - it "removes leading and trailing NULL bytes and whitespace" do - a = "\000 goodbye \000" - a.strip! - a.should == "goodbye" - end + it "removes leading and trailing NULL bytes and whitespace" do + a = "\000 goodbye \000" + a.strip! + a.should == "goodbye" end it "raises a FrozenError on a frozen instance that is modified" do diff --git a/spec/ruby/core/string/sub_spec.rb b/spec/ruby/core/string/sub_spec.rb index 9effe88c27..6ff28ec851 100644 --- a/spec/ruby/core/string/sub_spec.rb +++ b/spec/ruby/core/string/sub_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -170,22 +171,11 @@ describe "String#sub with pattern, replacement" do -> { "hello".sub(/[aeiou]/, 99) }.should raise_error(TypeError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("").sub(//, "").should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("").sub(/foo/, "").should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").sub(/foo/, "").should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("foo").sub("foo", "").should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("").sub(//, "").should be_an_instance_of(String) - StringSpecs::MyString.new("").sub(/foo/, "").should be_an_instance_of(String) - StringSpecs::MyString.new("foo").sub(/foo/, "").should be_an_instance_of(String) - StringSpecs::MyString.new("foo").sub("foo", "").should be_an_instance_of(String) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("").sub(//, "").should be_an_instance_of(String) + StringSpecs::MyString.new("").sub(/foo/, "").should be_an_instance_of(String) + StringSpecs::MyString.new("foo").sub(/foo/, "").should be_an_instance_of(String) + StringSpecs::MyString.new("foo").sub("foo", "").should be_an_instance_of(String) end it "sets $~ to MatchData of match and nil when there's none" do @@ -214,6 +204,17 @@ describe "String#sub with pattern, replacement" do "ababa".sub(/(b)/, '\\\\\1').should == "a\\baba" end + it "handles a pattern in a superset encoding" do + result = 'abc'.force_encoding(Encoding::US_ASCII).sub('é', 'è') + result.should == 'abc' + result.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + result = 'été'.sub('t'.force_encoding(Encoding::US_ASCII), 'u') + result.should == 'éué' + result.encoding.should == Encoding::UTF_8 + end end describe "String#sub with pattern and block" do @@ -231,10 +232,10 @@ describe "String#sub with pattern and block" do offsets = [] str.sub(/([aeiou])/) do - md = $~ - md.string.should == str - offsets << md.offset(0) - str + md = $~ + md.string.should == str + offsets << md.offset(0) + str end.should == "hhellollo" offsets.should == [[1, 2]] @@ -299,6 +300,27 @@ describe "String#sub! with pattern, replacement" do -> { s.sub!(/e/, "e") }.should raise_error(FrozenError) -> { s.sub!(/[aeiou]/, '*') }.should raise_error(FrozenError) end + + it "handles a pattern in a superset encoding" do + string = 'abc'.force_encoding(Encoding::US_ASCII) + + result = string.sub!('é', 'è') + + result.should == nil + string.should == 'abc' + string.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + string = 'été' + pattern = 't'.force_encoding(Encoding::US_ASCII) + + result = string.sub!(pattern, 'u') + + result.should == string + string.should == 'éué' + string.encoding.should == Encoding::UTF_8 + end end describe "String#sub! with pattern and block" do @@ -317,10 +339,10 @@ describe "String#sub! with pattern and block" do offsets = [] str.dup.sub!(/([aeiou])/) do - md = $~ - md.string.should == str - offsets << md.offset(0) - str + md = $~ + md.string.should == str + offsets << md.offset(0) + str end.should == "hhellollo" offsets.should == [[1, 2]] diff --git a/spec/ruby/core/string/swapcase_spec.rb b/spec/ruby/core/string/swapcase_spec.rb index 6307a1eaaf..011a213501 100644 --- a/spec/ruby/core/string/swapcase_spec.rb +++ b/spec/ruby/core/string/swapcase_spec.rb @@ -1,12 +1,17 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' describe "String#swapcase" do it "returns a new string with all uppercase chars from self converted to lowercase and vice versa" do - "Hello".swapcase.should == "hELLO" - "cYbEr_PuNk11".swapcase.should == "CyBeR_pUnK11" - "+++---111222???".swapcase.should == "+++---111222???" + "Hello".swapcase.should == "hELLO" + "cYbEr_PuNk11".swapcase.should == "CyBeR_pUnK11" + "+++---111222???".swapcase.should == "+++---111222???" + end + + it "returns a String in the same encoding as self" do + "Hello".encode("US-ASCII").swapcase.encoding.should == Encoding::US_ASCII end describe "full Unicode case mapping" do @@ -70,18 +75,9 @@ describe "String#swapcase" do -> { "abc".swapcase(:invalid_option) }.should raise_error(ArgumentError) end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("").swapcase.should be_an_instance_of(StringSpecs::MyString) - StringSpecs::MyString.new("hello").swapcase.should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("").swapcase.should be_an_instance_of(String) - StringSpecs::MyString.new("hello").swapcase.should be_an_instance_of(String) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("").swapcase.should be_an_instance_of(String) + StringSpecs::MyString.new("hello").swapcase.should be_an_instance_of(String) end end diff --git a/spec/ruby/core/string/to_c_spec.rb b/spec/ruby/core/string/to_c_spec.rb index 9c84b14f4d..1813890e72 100644 --- a/spec/ruby/core/string/to_c_spec.rb +++ b/spec/ruby/core/string/to_c_spec.rb @@ -1,99 +1,53 @@ require_relative '../../spec_helper' +require_relative '../../shared/kernel/complex' +require_relative 'fixtures/to_c' describe "String#to_c" do - it "returns a Complex object" do - '9'.to_c.should be_an_instance_of(Complex) - end - - it "understands integers" do - '20'.to_c.should == Complex(20) - end - - it "understands negative integers" do - '-3'.to_c.should == Complex(-3) - end - - it "understands fractions (numerator/denominator) for the real part" do - '2/3'.to_c.should == Complex(Rational(2, 3)) - end - - it "understands fractions (numerator/denominator) for the imaginary part" do - '4+2/3i'.to_c.should == Complex(4, Rational(2, 3)) - end - - it "understands negative fractions (-numerator/denominator) for the real part" do - '-2/3'.to_c.should == Complex(Rational(-2, 3)) - end - - it "understands negative fractions (-numerator/denominator) for the imaginary part" do - '7-2/3i'.to_c.should == Complex(7, Rational(-2, 3)) - end - - it "understands floats (a.b) for the real part" do - '2.3'.to_c.should == Complex(2.3) - end - - it "understands floats (a.b) for the imaginary part" do - '4+2.3i'.to_c.should == Complex(4, 2.3) - end - - it "understands negative floats (-a.b) for the real part" do - '-2.33'.to_c.should == Complex(-2.33) - end - - it "understands negative floats (-a.b) for the imaginary part" do - '7-28.771i'.to_c.should == Complex(7, -28.771) - end - - it "understands an integer followed by 'i' to mean that integer is the imaginary part" do - '35i'.to_c.should == Complex(0,35) - end - - it "understands a negative integer followed by 'i' to mean that negative integer is the imaginary part" do - '-29i'.to_c.should == Complex(0,-29) - end - - it "understands an 'i' by itself as denoting a complex number with an imaginary part of 1" do - 'i'.to_c.should == Complex(0,1) - end - - it "understands a '-i' by itself as denoting a complex number with an imaginary part of -1" do - '-i'.to_c.should == Complex(0,-1) - end - - it "understands 'a+bi' to mean a complex number with 'a' as the real part, 'b' as the imaginary" do - '79+4i'.to_c.should == Complex(79,4) - end + it_behaves_like :kernel_complex, :to_c_method, StringSpecs +end - it "understands 'a-bi' to mean a complex number with 'a' as the real part, '-b' as the imaginary" do - '79-4i'.to_c.should == Complex(79,-4) +describe "String#to_c" do + it "returns a complex number with 0 as the real part, 0 as the imaginary part for unrecognised Strings" do + 'ruby'.to_c.should == Complex(0, 0) end - it "understands scientific notation for the real part" do - '2e3+4i'.to_c.should == Complex(2e3,4) + it "ignores trailing garbage" do + '79+4iruby'.to_c.should == Complex(79, 4) + '7__9+4__0i'.to_c.should == Complex(7, 0) end - it "understands negative scientific notation for the real part" do - '-2e3+4i'.to_c.should == Complex(-2e3,4) - end + context "it treats special float value strings as characters" do + it "parses any string that starts with 'I' as 1i" do + 'Infinity'.to_c.should == Complex(0, 1) + '-Infinity'.to_c.should == Complex(0, -1) + 'Insecure'.to_c.should == Complex(0, 1) + '-Insecure'.to_c.should == Complex(0, -1) + end - it "understands scientific notation for the imaginary part" do - '4+2e3i'.to_c.should == Complex(4, 2e3) + it "does not parse any numeric information in 'NaN'" do + 'NaN'.to_c.should == Complex(0, 0) + end end - it "understands negative scientific notation for the imaginary part" do - '4-2e3i'.to_c.should == Complex(4, -2e3) + it "allows null-byte" do + "1-2i\0".to_c.should == Complex(1, -2) + "1\0-2i".to_c.should == Complex(1, 0) + "\01-2i".to_c.should == Complex(0, 0) end - it "understands scientific notation for the real and imaginary part in the same String" do - '2e3+2e4i'.to_c.should == Complex(2e3,2e4) + it "raises Encoding::CompatibilityError if String is in not ASCII-compatible encoding" do + -> { + '79+4i'.encode("UTF-16").to_c + }.should raise_error(Encoding::CompatibilityError, "ASCII incompatible encoding: UTF-16") end - it "understands negative scientific notation for the real and imaginary part in the same String" do - '-2e3-2e4i'.to_c.should == Complex(-2e3,-2e4) - end + it "treats a sequence of underscores as an end of Complex string" do + "5+3_1i".to_c.should == Complex(5, 31) + "5+3__1i".to_c.should == Complex(5) + "5+3___1i".to_c.should == Complex(5) - it "returns a complex number with 0 as the real part, 0 as the imaginary part for unrecognised Strings" do - 'ruby'.to_c.should == Complex(0,0) + "12_3".to_c.should == Complex(123) + "12__3".to_c.should == Complex(12) + "12___3".to_c.should == Complex(12) end end diff --git a/spec/ruby/core/string/to_f_spec.rb b/spec/ruby/core/string/to_f_spec.rb index cf64ecfc5d..abfd2517b6 100644 --- a/spec/ruby/core/string/to_f_spec.rb +++ b/spec/ruby/core/string/to_f_spec.rb @@ -5,16 +5,15 @@ require_relative 'fixtures/classes' describe "String#to_f" do it "treats leading characters of self as a floating point number" do - "123.45e1".to_f.should == 1234.5 - "45.67 degrees".to_f.should == 45.67 - "0".to_f.should == 0.0 - "123.45e1".to_f.should == 1234.5 + "123.45e1".to_f.should == 1234.5 + "45.67 degrees".to_f.should == 45.67 + "0".to_f.should == 0.0 - ".5".to_f.should == 0.5 - ".5e1".to_f.should == 5.0 - "5.".to_f.should == 5.0 - "5e".to_f.should == 5.0 - "5E".to_f.should == 5.0 + ".5".to_f.should == 0.5 + ".5e1".to_f.should == 5.0 + "5.".to_f.should == 5.0 + "5e".to_f.should == 5.0 + "5E".to_f.should == 5.0 end it "treats special float value strings as characters" do @@ -43,18 +42,39 @@ describe "String#to_f" do "1_234_567.890_1".to_f.should == 1_234_567.890_1 end - it "returns 0 for strings with any non-digit in them" do - "blah".to_f.should == 0 - "0b5".to_f.should == 0 - "0d5".to_f.should == 0 - "0o5".to_f.should == 0 - "0xx5".to_f.should == 0 - end - it "returns 0 for strings with leading underscores" do "_9".to_f.should == 0 end + it "stops if the underscore is not followed or preceded by a number" do + "1__2".to_f.should == 1.0 + "1_.2".to_f.should == 1.0 + "1._2".to_f.should == 1.0 + "1.2_e2".to_f.should == 1.2 + "1.2e_2".to_f.should == 1.2 + "1_x2".to_f.should == 1.0 + "1x_2".to_f.should == 1.0 + "+_1".to_f.should == 0.0 + "-_1".to_f.should == 0.0 + end + + it "does not allow prefixes to autodetect the base" do + "0b10".to_f.should == 0 + "010".to_f.should == 10 + "0o10".to_f.should == 0 + "0d10".to_f.should == 0 + "0x10".to_f.should == 0 + end + + it "treats any non-numeric character other than '.', 'e' and '_' as terminals" do + "blah".to_f.should == 0 + "1b5".to_f.should == 1 + "1d5".to_f.should == 1 + "1o5".to_f.should == 1 + "1xx5".to_f.should == 1 + "x5".to_f.should == 0 + end + it "takes an optional sign" do "-45.67 degrees".to_f.should == -45.67 "+45.67 degrees".to_f.should == 45.67 @@ -63,8 +83,60 @@ describe "String#to_f" do (1.0 / "-0".to_f).to_s.should == "-Infinity" end + it "treats a second 'e' as terminal" do + "1.234e1e2".to_f.should == 1.234e1 + end + + it "treats a second '.' as terminal" do + "1.2.3".to_f.should == 1.2 + end + + it "treats a '.' after an 'e' as terminal" do + "1.234e1.9".to_f.should == 1.234e1 + end + it "returns 0.0 if the conversion fails" do "bad".to_f.should == 0.0 "thx1138".to_f.should == 0.0 end + + it "ignores leading and trailing whitespace" do + " 1.2".to_f.should == 1.2 + "1.2 ".to_f.should == 1.2 + " 1.2 ".to_f.should == 1.2 + "\t1.2".to_f.should == 1.2 + "\n1.2".to_f.should == 1.2 + "\v1.2".to_f.should == 1.2 + "\f1.2".to_f.should == 1.2 + "\r1.2".to_f.should == 1.2 + end + + it "treats non-printable ASCII characters as terminals" do + "\0001.2".to_f.should == 0 + "\0011.2".to_f.should == 0 + "\0371.2".to_f.should == 0 + "\1771.2".to_f.should == 0 + "\2001.2".b.to_f.should == 0 + "\3771.2".b.to_f.should == 0 + end + + ruby_version_is "3.2.3" do + it "raises Encoding::CompatibilityError if String is in not ASCII-compatible encoding" do + -> { + '1.2'.encode("UTF-16").to_f + }.should raise_error(Encoding::CompatibilityError, "ASCII incompatible encoding: UTF-16") + end + end + + it "allows String representation without a fractional part" do + "1.".to_f.should == 1.0 + "+1.".to_f.should == 1.0 + "-1.".to_f.should == -1.0 + "1.e+0".to_f.should == 1.0 + "1.e+0".to_f.should == 1.0 + + ruby_bug "#20705", ""..."3.4" do + "1.e-2".to_f.should be_close(0.01, TOLERANCE) + end + end end diff --git a/spec/ruby/core/string/to_i_spec.rb b/spec/ruby/core/string/to_i_spec.rb index e4fa89aab3..39f69acda3 100644 --- a/spec/ruby/core/string/to_i_spec.rb +++ b/spec/ruby/core/string/to_i_spec.rb @@ -10,6 +10,18 @@ describe "String#to_i" do "1_2_3asdf".to_i.should == 123 end + it "ignores multiple non-consecutive underscores when the first digit is 0" do + (2..16).each do |base| + "0_0_010".to_i(base).should == base; + end + end + + it "bails out at the first double underscore if the first digit is 0" do + (2..16).each do |base| + "010__1".to_i(base).should == base; + end + end + it "ignores leading whitespaces" do [ " 123", " 123", "\r\n\r\n123", "\t\t123", "\r\n\t\n123", " \t\n\r\t 123"].each do |str| diff --git a/spec/ruby/core/string/to_r_spec.rb b/spec/ruby/core/string/to_r_spec.rb index 7e1d635d3b..4ffbb10d98 100644 --- a/spec/ruby/core/string/to_r_spec.rb +++ b/spec/ruby/core/string/to_r_spec.rb @@ -33,6 +33,10 @@ describe "String#to_r" do "-20".to_r.should == Rational(-20, 1) end + it "accepts leading plus signs" do + "+20".to_r.should == Rational(20, 1) + end + it "does not treat a leading period without a numeric prefix as a decimal point" do ".9".to_r.should_not == Rational(8106479329266893, 9007199254740992) end diff --git a/spec/ruby/core/string/tr_s_spec.rb b/spec/ruby/core/string/tr_s_spec.rb index e1bb20ce35..dd72da440c 100644 --- a/spec/ruby/core/string/tr_s_spec.rb +++ b/spec/ruby/core/string/tr_s_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -17,6 +18,15 @@ describe "String#tr_s" do "hello ^--^".tr_s("---", "_").should == "hello ^_^" end + ruby_bug "#19769", ""..."3.3" do + it "accepts c1-c1 notation to denote range of one character" do + "hello".tr_s('e-e', 'x').should == "hxllo" + "123456789".tr_s("2-23","xy").should == "1xy456789" + "hello ^-^".tr_s("e-", "a-a_").should == "hallo ^_^" + "hello ^-^".tr_s("---o", "_a").should == "hella ^_^" + end + end + it "pads to_str with its last char if it is shorter than from_string" do "this".tr_s("this", "x").should == "x" end @@ -45,16 +55,8 @@ describe "String#tr_s" do "bla".tr_s(from_str, to_str).should == "BlA" end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("hello").tr_s("e", "a").should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns String instances when called on a subclass" do - StringSpecs::MyString.new("hello").tr_s("e", "a").should be_an_instance_of(String) - end + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("hello").tr_s("e", "a").should be_an_instance_of(String) end # http://redmine.ruby-lang.org/issues/show/1839 diff --git a/spec/ruby/core/string/tr_spec.rb b/spec/ruby/core/string/tr_spec.rb index 72adb9f2eb..75841a974f 100644 --- a/spec/ruby/core/string/tr_spec.rb +++ b/spec/ruby/core/string/tr_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -16,6 +17,15 @@ describe "String#tr" do "hello ^-^".tr("---", "_").should == "hello ^_^" end + ruby_bug "#19769", ""..."3.3" do + it "accepts c1-c1 notation to denote range of one character" do + "hello".tr('e-e', 'x').should == "hxllo" + "123456789".tr("2-23","xy").should == "1xy456789" + "hello ^-^".tr("e-", "a-a_").should == "hallo ^_^" + "hello ^-^".tr("---o", "_a").should == "hella ^_^" + end + end + it "pads to_str with its last char if it is shorter than from_string" do "this".tr("this", "x").should == "xxxx" "hello".tr("a-z", "A-H.").should == "HE..." @@ -57,16 +67,8 @@ describe "String#tr" do "bla".tr(from_str, to_str).should == "BlA" end - ruby_version_is ''...'3.0' do - it "returns subclass instances when called on a subclass" do - StringSpecs::MyString.new("hello").tr("e", "a").should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns Stringinstances when called on a subclass" do - StringSpecs::MyString.new("hello").tr("e", "a").should be_an_instance_of(String) - end + it "returns Stringinstances when called on a subclass" do + StringSpecs::MyString.new("hello").tr("e", "a").should be_an_instance_of(String) end # http://redmine.ruby-lang.org/issues/show/1839 diff --git a/spec/ruby/core/string/try_convert_spec.rb b/spec/ruby/core/string/try_convert_spec.rb index 84415c4a75..72ce5dd8b2 100644 --- a/spec/ruby/core/string/try_convert_spec.rb +++ b/spec/ruby/core/string/try_convert_spec.rb @@ -39,7 +39,7 @@ describe "String.try_convert" do it "sends #to_str to the argument and raises TypeError if it's not a kind of String" do obj = mock("to_str") obj.should_receive(:to_str).and_return(Object.new) - -> { String.try_convert obj }.should raise_error(TypeError) + -> { String.try_convert obj }.should raise_error(TypeError, "can't convert MockObject to String (MockObject#to_str gives Object)") end it "does not rescue exceptions raised by #to_str" do diff --git a/spec/ruby/core/string/undump_spec.rb b/spec/ruby/core/string/undump_spec.rb index 08058d9bd1..6ff220161c 100644 --- a/spec/ruby/core/string/undump_spec.rb +++ b/spec/ruby/core/string/undump_spec.rb @@ -389,7 +389,7 @@ describe "String#undump" do '"\\bv".force_encoding("UTF-16BE")'.undump.should == "\u0876".encode('utf-16be') end - it "keeps origin encoding" do + it "returns a String in the same encoding as self" do '"foo"'.encode("ISO-8859-1").undump.encoding.should == Encoding::ISO_8859_1 '"foo"'.encode('windows-1251').undump.encoding.should == Encoding::Windows_1251 end diff --git a/spec/ruby/core/string/unicode_normalize_spec.rb b/spec/ruby/core/string/unicode_normalize_spec.rb index 6de7533fc7..2e7d22394a 100644 --- a/spec/ruby/core/string/unicode_normalize_spec.rb +++ b/spec/ruby/core/string/unicode_normalize_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' # Examples taken from http://www.unicode.org/reports/tr15/#Norm_Forms diff --git a/spec/ruby/core/string/unicode_normalized_spec.rb b/spec/ruby/core/string/unicode_normalized_spec.rb index 87f3740459..91cf2086b2 100644 --- a/spec/ruby/core/string/unicode_normalized_spec.rb +++ b/spec/ruby/core/string/unicode_normalized_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' describe "String#unicode_normalized?" do diff --git a/spec/ruby/core/string/unpack/a_spec.rb b/spec/ruby/core/string/unpack/a_spec.rb index 2d83b4c824..a68e842e15 100644 --- a/spec/ruby/core/string/unpack/a_spec.rb +++ b/spec/ruby/core/string/unpack/a_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' @@ -31,7 +31,7 @@ describe "String#unpack with format 'A'" do end it "decodes into raw (ascii) string values" do - str = "str".force_encoding('UTF-8').unpack("A*")[0] + str = "str".dup.force_encoding('UTF-8').unpack("A*")[0] str.encoding.should == Encoding::BINARY end diff --git a/spec/ruby/core/string/unpack/at_spec.rb b/spec/ruby/core/string/unpack/at_spec.rb index 70b2389d69..d4133c23ee 100644 --- a/spec/ruby/core/string/unpack/at_spec.rb +++ b/spec/ruby/core/string/unpack/at_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' diff --git a/spec/ruby/core/string/unpack/b_spec.rb b/spec/ruby/core/string/unpack/b_spec.rb index 1a838d6c7c..b088f901fc 100644 --- a/spec/ruby/core/string/unpack/b_spec.rb +++ b/spec/ruby/core/string/unpack/b_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' @@ -86,13 +86,30 @@ describe "String#unpack with format 'B'" do ].should be_computed_by(:unpack, "BBB") end - it "ignores NULL bytes between directives" do - "\x80\x00".unpack("B\x00B").should == ["1", "0"] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x80\x00".unpack("B\x00B").should == ["1", "0"] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x80\x00".unpack("B\x00B") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do "\x80\x00".unpack("B B").should == ["1", "0"] end + + it "decodes into US-ASCII string values" do + str = "s".dup.force_encoding('UTF-8').unpack("B*")[0] + str.encoding.name.should == 'US-ASCII' + end end describe "String#unpack with format 'b'" do @@ -177,8 +194,20 @@ describe "String#unpack with format 'b'" do ].should be_computed_by(:unpack, "bbb") end - it "ignores NULL bytes between directives" do - "\x01\x00".unpack("b\x00b").should == ["1", "0"] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x00".unpack("b\x00b").should == ["1", "0"] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x00".unpack("b\x00b") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -186,8 +215,7 @@ describe "String#unpack with format 'b'" do end it "decodes into US-ASCII string values" do - str = "s".force_encoding('UTF-8').unpack("b*")[0] + str = "s".dup.force_encoding('UTF-8').unpack("b*")[0] str.encoding.name.should == 'US-ASCII' end - end diff --git a/spec/ruby/core/string/unpack/c_spec.rb b/spec/ruby/core/string/unpack/c_spec.rb index ed8caa4895..1e9548fb82 100644 --- a/spec/ruby/core/string/unpack/c_spec.rb +++ b/spec/ruby/core/string/unpack/c_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' @@ -35,8 +35,20 @@ describe :string_unpack_8bit, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "abc".unpack(unpack_format("\000", 2)).should == [97, 98] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "abc".unpack(unpack_format("\000", 2)).should == [97, 98] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/comment_spec.rb b/spec/ruby/core/string/unpack/comment_spec.rb index e18a53df3c..050d2b7fc0 100644 --- a/spec/ruby/core/string/unpack/comment_spec.rb +++ b/spec/ruby/core/string/unpack/comment_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' diff --git a/spec/ruby/core/string/unpack/h_spec.rb b/spec/ruby/core/string/unpack/h_spec.rb index f2f5dcf396..535836087d 100644 --- a/spec/ruby/core/string/unpack/h_spec.rb +++ b/spec/ruby/core/string/unpack/h_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' @@ -56,8 +56,20 @@ describe "String#unpack with format 'H'" do ].should be_computed_by(:unpack, "HHH") end - it "ignores NULL bytes between directives" do - "\x01\x10".unpack("H\x00H").should == ["0", "1"] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x10".unpack("H\x00H").should == ["0", "1"] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("H\x00H") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -121,8 +133,20 @@ describe "String#unpack with format 'h'" do ].should be_computed_by(:unpack, "hhh") end - it "ignores NULL bytes between directives" do - "\x01\x10".unpack("h\x00h").should == ["1", "0"] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x10".unpack("h\x00h").should == ["1", "0"] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("h\x00h") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/l_spec.rb b/spec/ruby/core/string/unpack/l_spec.rb index 18bb68b8d0..0adb567eca 100644 --- a/spec/ruby/core/string/unpack/l_spec.rb +++ b/spec/ruby/core/string/unpack/l_spec.rb @@ -14,7 +14,7 @@ describe "String#unpack with format 'L'" do it_behaves_like :string_unpack_32bit_be_unsigned, 'L>' end - platform_is wordsize: 32 do + platform_is c_long_size: 32 do describe "with modifier '<' and '_'" do it_behaves_like :string_unpack_32bit_le, 'L<_' it_behaves_like :string_unpack_32bit_le, 'L_<' @@ -44,7 +44,7 @@ describe "String#unpack with format 'L'" do end end - platform_is wordsize: 64 do + platform_is c_long_size: 64 do describe "with modifier '<' and '_'" do it_behaves_like :string_unpack_64bit_le, 'L<_' it_behaves_like :string_unpack_64bit_le, 'L_<' @@ -86,7 +86,7 @@ describe "String#unpack with format 'l'" do it_behaves_like :string_unpack_32bit_be_signed, 'l>' end - platform_is wordsize: 32 do + platform_is c_long_size: 32 do describe "with modifier '<' and '_'" do it_behaves_like :string_unpack_32bit_le, 'l<_' it_behaves_like :string_unpack_32bit_le, 'l_<' @@ -116,7 +116,7 @@ describe "String#unpack with format 'l'" do end end - platform_is wordsize: 64 do + platform_is c_long_size: 64 do describe "with modifier '<' and '_'" do it_behaves_like :string_unpack_64bit_le, 'l<_' it_behaves_like :string_unpack_64bit_le, 'l_<' @@ -160,7 +160,7 @@ little_endian do it_behaves_like :string_unpack_32bit_le_signed, 'l' end - platform_is wordsize: 32 do + platform_is c_long_size: 32 do describe "String#unpack with format 'L' with modifier '_'" do it_behaves_like :string_unpack_32bit_le, 'L_' it_behaves_like :string_unpack_32bit_le_unsigned, 'L_' @@ -182,7 +182,7 @@ little_endian do end end - platform_is wordsize: 64 do + platform_is c_long_size: 64 do describe "String#unpack with format 'L' with modifier '_'" do it_behaves_like :string_unpack_64bit_le, 'L_' it_behaves_like :string_unpack_64bit_le_unsigned, 'L_' @@ -218,7 +218,7 @@ big_endian do it_behaves_like :string_unpack_32bit_be_signed, 'l' end - platform_is wordsize: 32 do + platform_is c_long_size: 32 do describe "String#unpack with format 'L' with modifier '_'" do it_behaves_like :string_unpack_32bit_be, 'L_' it_behaves_like :string_unpack_32bit_be_unsigned, 'L_' @@ -240,7 +240,7 @@ big_endian do end end - platform_is wordsize: 64 do + platform_is c_long_size: 64 do describe "String#unpack with format 'L' with modifier '_'" do it_behaves_like :string_unpack_64bit_be, 'L_' it_behaves_like :string_unpack_64bit_be_unsigned, 'L_' diff --git a/spec/ruby/core/string/unpack/m_spec.rb b/spec/ruby/core/string/unpack/m_spec.rb index 21134514a1..357987a053 100644 --- a/spec/ruby/core/string/unpack/m_spec.rb +++ b/spec/ruby/core/string/unpack/m_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' @@ -97,6 +97,11 @@ describe "String#unpack with format 'M'" do ["=FF=\n", ["\xff"]] ].should be_computed_by(:unpack, "M") end + + it "unpacks incomplete escape sequences as literal characters" do + "foo=".unpack("M").should == ["foo="] + "foo=4".unpack("M").should == ["foo=4"] + end end describe "String#unpack with format 'm'" do diff --git a/spec/ruby/core/string/unpack/shared/basic.rb b/spec/ruby/core/string/unpack/shared/basic.rb index f636f4689f..734630bda0 100644 --- a/spec/ruby/core/string/unpack/shared/basic.rb +++ b/spec/ruby/core/string/unpack/shared/basic.rb @@ -9,17 +9,19 @@ describe :string_unpack_basic, shared: true do "abc".unpack(d).should be_an_instance_of(Array) end - it "raises a TypeError when passed nil" do - -> { "abc".unpack(nil) }.should raise_error(TypeError) - end - - it "raises a TypeError when passed an Integer" do - -> { "abc".unpack(1) }.should raise_error(TypeError) + ruby_version_is ""..."3.3" do + it "warns about using an unknown directive" do + -> { "abcdefgh".unpack("a R" + unpack_format) }.should complain(/unknown unpack directive 'R' in 'a R#{unpack_format}'/) + -> { "abcdefgh".unpack("a 0" + unpack_format) }.should complain(/unknown unpack directive '0' in 'a 0#{unpack_format}'/) + -> { "abcdefgh".unpack("a :" + unpack_format) }.should complain(/unknown unpack directive ':' in 'a :#{unpack_format}'/) + end end - ruby_version_is "3.1" do - it "starts unpacking from the given offset" do - "abc".unpack("CC", offset: 1).should == [98, 99] + ruby_version_is "3.3" do + it "raises ArgumentError when a directive is unknown" do + -> { "abcdefgh".unpack("a K" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive 'K' in 'a K#{unpack_format}'") + -> { "abcdefgh".unpack("a 0" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive '0' in 'a 0#{unpack_format}'") + -> { "abcdefgh".unpack("a :" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive ':' in 'a :#{unpack_format}'") end end end @@ -32,18 +34,4 @@ describe :string_unpack_no_platform, shared: true do it "raises an ArgumentError when the format modifier is '!'" do -> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError) end - - ruby_version_is "3.1" do - it "raises an ArgumentError when the offset is negative" do - -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError) - end - - it "returns nil if the offset is at the end of the string" do - "a".unpack("C", offset: 1).should == [nil] - end - - it "raises an ArgumentError when the offset is larget than the string" do - -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError) - end - end end diff --git a/spec/ruby/core/string/unpack/shared/float.rb b/spec/ruby/core/string/unpack/shared/float.rb index 99bd8a3401..b31c2c8bdc 100644 --- a/spec/ruby/core/string/unpack/shared/float.rb +++ b/spec/ruby/core/string/unpack/shared/float.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary describe :string_unpack_float_le, shared: true do it "decodes one float for a single format character" do @@ -56,9 +56,21 @@ describe :string_unpack_float_le, shared: true do [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - it "ignores NULL bytes between directives" do - array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) - array.should == [2.9000000953674316, 1.399999976158142] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -123,9 +135,21 @@ describe :string_unpack_float_be, shared: true do [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - it "ignores NULL bytes between directives" do - array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) - array.should == [2.9000000953674316, 1.399999976158142] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -193,8 +217,20 @@ describe :string_unpack_double_le, shared: true do [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - it "ignores NULL bytes between directives" do - "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -261,8 +297,20 @@ describe :string_unpack_double_be, shared: true do [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true end - it "ignores NULL bytes between directives" do - "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/integer.rb b/spec/ruby/core/string/unpack/shared/integer.rb index cbaa743683..d3934753ba 100644 --- a/spec/ruby/core/string/unpack/shared/integer.rb +++ b/spec/ruby/core/string/unpack/shared/integer.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary describe :string_unpack_16bit_le, shared: true do it "decodes one short for a single format character" do @@ -32,8 +32,20 @@ describe :string_unpack_16bit_le, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcd".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -85,8 +97,20 @@ describe :string_unpack_16bit_be, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -139,8 +163,20 @@ describe :string_unpack_32bit_le, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcdefgh".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -193,8 +229,20 @@ describe :string_unpack_32bit_be, shared: true do ].should be_computed_by(:unpack, unpack_format(3)) end - it "ignores NULL bytes between directives" do - "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "dcbahgfe".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -243,9 +291,21 @@ describe :string_unpack_64bit_le, shared: true do "abc".unpack(unpack_format('*')).should == [] end - it "ignores NULL bytes between directives" do - array = "abcdefghabghefcd".unpack(unpack_format("\000", 2)) - array.should == [7523094288207667809, 7233738012216484449] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + array = "abcdefghabghefcd".unpack(unpack_format("\000", 2)) + array.should == [7523094288207667809, 7233738012216484449] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do @@ -305,9 +365,21 @@ describe :string_unpack_64bit_be, shared: true do "abc".unpack(unpack_format('*')).should == [] end - it "ignores NULL bytes between directives" do - array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) - array.should == [7523094288207667809, 7233738012216484449] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) + array.should == [7523094288207667809, 7233738012216484449] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/shared/unicode.rb b/spec/ruby/core/string/unpack/shared/unicode.rb index a2b4e142b2..9fe07f53ae 100644 --- a/spec/ruby/core/string/unpack/shared/unicode.rb +++ b/spec/ruby/core/string/unpack/shared/unicode.rb @@ -50,8 +50,20 @@ describe :string_unpack_unicode, shared: true do "\xc2\x80".unpack("UUUU").should == [0x80] end - it "ignores NULL bytes between directives" do - "\x01\x02".unpack("U\x00U").should == [1, 2] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x02".unpack("U\x00U").should == [1, 2] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02".unpack("U\x00U") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/u_spec.rb b/spec/ruby/core/string/unpack/u_spec.rb index 7845e6d5f2..68c8f6f11c 100644 --- a/spec/ruby/core/string/unpack/u_spec.rb +++ b/spec/ruby/core/string/unpack/u_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' @@ -33,7 +33,7 @@ describe "String#unpack with format 'u'" do str = "".unpack("u")[0] str.encoding.should == Encoding::BINARY - str = "1".force_encoding('UTF-8').unpack("u")[0] + str = "1".dup.force_encoding('UTF-8').unpack("u")[0] str.encoding.should == Encoding::BINARY end diff --git a/spec/ruby/core/string/unpack/w_spec.rb b/spec/ruby/core/string/unpack/w_spec.rb index 011c75f5c4..7d3533ccae 100644 --- a/spec/ruby/core/string/unpack/w_spec.rb +++ b/spec/ruby/core/string/unpack/w_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' @@ -15,8 +15,20 @@ describe "String#unpack with directive 'w'" do ].should be_computed_by(:unpack, "w") end - it "ignores NULL bytes between directives" do - "\x01\x02\x03".unpack("w\x00w").should == [1, 2] + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x02\x03".unpack("w\x00w").should == [1, 2] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02\x03".unpack("w\x00w") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end end it "ignores spaces between directives" do diff --git a/spec/ruby/core/string/unpack/x_spec.rb b/spec/ruby/core/string/unpack/x_spec.rb index 5e248de77e..2926ebbe0f 100644 --- a/spec/ruby/core/string/unpack/x_spec.rb +++ b/spec/ruby/core/string/unpack/x_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' diff --git a/spec/ruby/core/string/unpack/z_spec.rb b/spec/ruby/core/string/unpack/z_spec.rb index ce8da4b29e..1030390550 100644 --- a/spec/ruby/core/string/unpack/z_spec.rb +++ b/spec/ruby/core/string/unpack/z_spec.rb @@ -1,4 +1,4 @@ -# -*- encoding: binary -*- +# encoding: binary require_relative '../../../spec_helper' require_relative '../fixtures/classes' require_relative 'shared/basic' diff --git a/spec/ruby/core/string/unpack1_spec.rb b/spec/ruby/core/string/unpack1_spec.rb index f59bd92d6a..cfb47fe695 100644 --- a/spec/ruby/core/string/unpack1_spec.rb +++ b/spec/ruby/core/string/unpack1_spec.rb @@ -8,23 +8,40 @@ describe "String#unpack1" do "A".unpack1("B*").should == "01000001" end - ruby_version_is "3.1" do - it "starts unpacking from the given offset" do - "ZZABCD".unpack1('x3C', offset: 2).should == "ABCD".unpack('x3C')[0] - "ZZZZaG9nZWZ1Z2E=".unpack1("m", offset: 4).should == "hogefuga" - "ZA".unpack1("B*", offset: 1).should == "01000001" - end + it "starts unpacking from the given offset" do + "ZZABCD".unpack1('x3C', offset: 2).should == "ABCD".unpack('x3C')[0] + "ZZZZaG9nZWZ1Z2E=".unpack1("m", offset: 4).should == "hogefuga" + "ZA".unpack1("B*", offset: 1).should == "01000001" + end - it "raises an ArgumentError when the offset is negative" do - -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError) - end + it "traits offset as a bytes offset" do + "؈".unpack("CC").should == [216, 136] + "؈".unpack1("C").should == 216 + "؈".unpack1("C", offset: 1).should == 136 + end + + it "raises an ArgumentError when the offset is negative" do + -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative") + end + + it "returns nil if the offset is at the end of the string" do + "a".unpack1("C", offset: 1).should == nil + end + + it "raises an ArgumentError when the offset is larger than the string bytesize" do + -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string") + end + + context "with format 'm0'" do + # unpack1("m0") takes a special code path that calls Pack.unpackBase46Strict instead of Pack.unpack_m, + # which is why we repeat the tests for unpack("m0") here. - it "returns nil if the offset is at the end of the string" do - "a".unpack1("C", offset: 1).should == nil + it "decodes base64" do + "dGVzdA==".unpack1("m0").should == "test" end - it "raises an ArgumentError when the offset is larget than the string" do - -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError) + it "raises an ArgumentError for an invalid base64 character" do + -> { "dGV%zdA==".unpack1("m0") }.should raise_error(ArgumentError) end end end diff --git a/spec/ruby/core/string/unpack_spec.rb b/spec/ruby/core/string/unpack_spec.rb new file mode 100644 index 0000000000..a0abf8fa99 --- /dev/null +++ b/spec/ruby/core/string/unpack_spec.rb @@ -0,0 +1,32 @@ +require_relative '../../spec_helper' + +describe "String#unpack" do + it "raises a TypeError when passed nil" do + -> { "abc".unpack(nil) }.should raise_error(TypeError) + end + + it "raises a TypeError when passed an Integer" do + -> { "abc".unpack(1) }.should raise_error(TypeError) + end + + it "starts unpacking from the given offset" do + "abc".unpack("CC", offset: 1).should == [98, 99] + end + + it "traits offset as a bytes offset" do + "؈".unpack("CC").should == [216, 136] + "؈".unpack("CC", offset: 1).should == [136, nil] + end + + it "raises an ArgumentError when the offset is negative" do + -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative") + end + + it "returns nil if the offset is at the end of the string" do + "a".unpack("C", offset: 1).should == [nil] + end + + it "raises an ArgumentError when the offset is larger than the string" do + -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string") + end +end diff --git a/spec/ruby/core/string/upcase_spec.rb b/spec/ruby/core/string/upcase_spec.rb index 209fe73b6e..652de5c2ef 100644 --- a/spec/ruby/core/string/upcase_spec.rb +++ b/spec/ruby/core/string/upcase_spec.rb @@ -1,4 +1,5 @@ # -*- encoding: utf-8 -*- +# frozen_string_literal: false require_relative '../../spec_helper' require_relative 'fixtures/classes' @@ -8,6 +9,10 @@ describe "String#upcase" do "hello".upcase.should == "HELLO" end + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").upcase.encoding.should == Encoding::US_ASCII + end + describe "full Unicode case mapping" do it "works for all of Unicode with no option" do "äöü".upcase.should == "ÄÖÜ" @@ -69,16 +74,8 @@ describe "String#upcase" do -> { "abc".upcase(:invalid_option) }.should raise_error(ArgumentError) end - ruby_version_is ''...'3.0' do - it "returns a subclass instance for subclasses" do - StringSpecs::MyString.new("fooBAR").upcase.should be_an_instance_of(StringSpecs::MyString) - end - end - - ruby_version_is '3.0' do - it "returns a String instance for subclasses" do - StringSpecs::MyString.new("fooBAR").upcase.should be_an_instance_of(String) - end + it "returns a String instance for subclasses" do + StringSpecs::MyString.new("fooBAR").upcase.should be_an_instance_of(String) end end diff --git a/spec/ruby/core/string/uplus_spec.rb b/spec/ruby/core/string/uplus_spec.rb index 038b283c90..20767bcc01 100644 --- a/spec/ruby/core/string/uplus_spec.rb +++ b/spec/ruby/core/string/uplus_spec.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: false require_relative '../../spec_helper' describe 'String#+@' do @@ -7,16 +8,53 @@ describe 'String#+@' do output.should_not.frozen? output.should == 'foo' + + output << 'bar' + output.should == 'foobar' end - it 'returns self if the String is not frozen' do - input = 'foo' + it 'returns a mutable String itself' do + input = String.new("foo") output = +input - output.equal?(input).should == true + output.should.equal?(input) + + input << "bar" + output.should == "foobar" + end + + context 'if file has "frozen_string_literal: true" magic comment' do + it 'returns mutable copy of a literal' do + ruby_exe(fixture(__FILE__, "freeze_magic_comment.rb")).should == 'mutable' + end end - it 'returns mutable copy despite freeze-magic-comment in file' do - ruby_exe(fixture(__FILE__, "freeze_magic_comment.rb")).should == 'mutable' + context 'if file has "frozen_string_literal: false" magic comment' do + it 'returns literal string itself' do + input = 'foo' + output = +input + + output.equal?(input).should == true + end + end + + context 'if file has no frozen_string_literal magic comment' do + ruby_version_is ''...'3.4' do + it 'returns literal string itself' do + eval(<<~RUBY).should == true + s = "foo" + s.equal?(+s) + RUBY + end + end + + ruby_version_is '3.4' do + it 'returns mutable copy of a literal' do + eval(<<~RUBY).should == false + s = "foo" + s.equal?(+s) + RUBY + end + end end end diff --git a/spec/ruby/core/string/upto_spec.rb b/spec/ruby/core/string/upto_spec.rb index f8529b1d2b..8bc847d5ac 100644 --- a/spec/ruby/core/string/upto_spec.rb +++ b/spec/ruby/core/string/upto_spec.rb @@ -80,6 +80,12 @@ describe "String#upto" do a.should == ["Σ", "Τ", "Υ", "Φ", "Χ", "Ψ", "Ω"] end + it "raises Encoding::CompatibilityError when incompatible characters are given" do + char1 = 'a'.dup.force_encoding("EUC-JP") + char2 = 'b'.dup.force_encoding("ISO-2022-JP") + -> { char1.upto(char2) {} }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: EUC-JP and ISO-2022-JP") + end + describe "on sequence of numbers" do it "calls the block as Integer#upto" do "8".upto("11").to_a.should == 8.upto(11).map(&:to_s) diff --git a/spec/ruby/core/string/valid_encoding/utf_8_spec.rb b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb new file mode 100644 index 0000000000..a14c3af830 --- /dev/null +++ b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb @@ -0,0 +1,214 @@ +# -*- encoding: utf-8 -*- +require_relative '../../../spec_helper' + +describe "String#valid_encoding? and UTF-8" do + def utf8(bytes) + bytes.pack("C*").force_encoding("UTF-8") + end + + describe "1-byte character" do + it "is valid if is in format 0xxxxxxx" do + utf8([0b00000000]).valid_encoding?.should == true + utf8([0b01111111]).valid_encoding?.should == true + end + + it "is not valid if is not in format 0xxxxxxx" do + utf8([0b10000000]).valid_encoding?.should == false + utf8([0b11111111]).valid_encoding?.should == false + end + end + + describe "2-bytes character" do + it "is valid if in format [110xxxxx 10xxxxx]" do + utf8([0b11000010, 0b10000000]).valid_encoding?.should == true + utf8([0b11000010, 0b10111111]).valid_encoding?.should == true + + utf8([0b11011111, 0b10000000]).valid_encoding?.should == true + utf8([0b11011111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 110xxxxx" do + utf8([0b00000010, 0b10000000]).valid_encoding?.should == false + utf8([0b00100010, 0b10000000]).valid_encoding?.should == false + utf8([0b01000010, 0b10000000]).valid_encoding?.should == false + utf8([0b01100010, 0b10000000]).valid_encoding?.should == false + utf8([0b10000010, 0b10000000]).valid_encoding?.should == false + utf8([0b10100010, 0b10000000]).valid_encoding?.should == false + utf8([0b11000010, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11100010, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11000010, 0b00000000]).valid_encoding?.should == false + utf8([0b11000010, 0b01000000]).valid_encoding?.should == false + utf8([0b11000010, 0b11000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxxxx10 xx000000] (codepoints < U+007F, that are encoded with the 1-byte format)" do + utf8([0b11000000, 0b10111111]).valid_encoding?.should == false + utf8([0b11000001, 0b10111111]).valid_encoding?.should == false + end + + it "is not valid if the first byte is missing" do + bytes = [0b11000010, 0b10000000] + utf8(bytes[1..1]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11000010, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end + + describe "3-bytes character" do + it "is valid if in format [1110xxxx 10xxxxxx 10xxxxxx]" do + utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true + utf8([0b11100000, 0b10100000, 0b10111111]).valid_encoding?.should == true + utf8([0b11100000, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11101111, 0b10111111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 1110xxxx" do + utf8([0b00000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11100000, 0b00100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b01100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b11100000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the third byte is not in format 10xxxxxx" do + utf8([0b11100000, 0b10100000, 0b00000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxx0000 xx100000 xx000000] (codepoints < U+07FF that are encoded with the 2-byte format)" do + utf8([0b11100000, 0b10010000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10001000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000100, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000010, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000001, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if in range [xxxx1101 xx100000 xx000000] - [xxxx1101 xx111111 xx111111] (codepoints U+D800 - U+DFFF)" do + utf8([0b11101101, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11101101, 0b10100000, 0b10000001]).valid_encoding?.should == false + utf8([0b11101101, 0b10111111, 0b10111111]).valid_encoding?.should == false + + utf8([0b11101101, 0b10011111, 0b10111111]).valid_encoding?.should == true # lower boundary - 1 + utf8([0b11101110, 0b10000000, 0b10000000]).valid_encoding?.should == true # upper boundary + 1 + end + + it "is not valid if the first byte is missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8(bytes[2..3]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8([bytes[0], bytes[2]]).valid_encoding?.should == false + end + + it "is not valid if the second and the third bytes are missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end + + describe "4-bytes character" do + it "is valid if in format [11110xxx 10xxxxxx 10xxxxxx 10xxxxxx]" do + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true + utf8([0b11110000, 0b10010000, 0b10000000, 0b10111111]).valid_encoding?.should == true + utf8([0b11110000, 0b10010000, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11110000, 0b10111111, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11110100, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 11110xxx" do + utf8([0b11100000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11010000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b10110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b01110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b00010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b01010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b11010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the third byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b10010000, 0b00000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b01000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10010000, 0b11000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the forth byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b10010000, 0b10000000, 0b00000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b01000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10010000, 0b10000000, 0b11000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxxx000 xx001000 xx000000 xx000000] (codepoint < U+10000)" do + utf8([0b11110000, 0b10000111, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000110, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000101, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000100, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000011, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000010, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000001, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if is greater than [xxxxx100 xx001111 xx111111 xx111111] (codepoint > U+10FFFF)" do + utf8([0b11110100, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110100, 0b10100000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110100, 0b10110000, 0b10000000, 0b10000000]).valid_encoding?.should == false + + utf8([0b11110101, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + utf8([0b11110110, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + utf8([0b11110111, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + end + + it "is not valid if the first byte is missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8(bytes[1..3]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8([bytes[0], bytes[2], bytes[3]]).valid_encoding?.should == false + end + + it "is not valid if the second and the third bytes are missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8([bytes[0], bytes[3]]).valid_encoding?.should == false + end + + it "is not valid if the second, the third and the fourth bytes are missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end +end diff --git a/spec/ruby/core/string/valid_encoding_spec.rb b/spec/ruby/core/string/valid_encoding_spec.rb index be7cef7a8e..375035cd94 100644 --- a/spec/ruby/core/string/valid_encoding_spec.rb +++ b/spec/ruby/core/string/valid_encoding_spec.rb @@ -7,13 +7,13 @@ describe "String#valid_encoding?" do end it "returns true if self is valid in the current encoding and other encodings" do - str = "\x77" + str = +"\x77" str.force_encoding('utf-8').valid_encoding?.should be_true str.force_encoding('binary').valid_encoding?.should be_true end it "returns true for all encodings self is valid in" do - str = "\xE6\x9D\x94" + str = +"\xE6\x9D\x94" str.force_encoding('BINARY').valid_encoding?.should be_true str.force_encoding('UTF-8').valid_encoding?.should be_true str.force_encoding('US-ASCII').valid_encoding?.should be_false @@ -43,10 +43,10 @@ describe "String#valid_encoding?" do str.force_encoding('KOI8-R').valid_encoding?.should be_true str.force_encoding('KOI8-U').valid_encoding?.should be_true str.force_encoding('Shift_JIS').valid_encoding?.should be_false - "\xD8\x00".force_encoding('UTF-16BE').valid_encoding?.should be_false - "\x00\xD8".force_encoding('UTF-16LE').valid_encoding?.should be_false - "\x04\x03\x02\x01".force_encoding('UTF-32BE').valid_encoding?.should be_false - "\x01\x02\x03\x04".force_encoding('UTF-32LE').valid_encoding?.should be_false + "\xD8\x00".dup.force_encoding('UTF-16BE').valid_encoding?.should be_false + "\x00\xD8".dup.force_encoding('UTF-16LE').valid_encoding?.should be_false + "\x04\x03\x02\x01".dup.force_encoding('UTF-32BE').valid_encoding?.should be_false + "\x01\x02\x03\x04".dup.force_encoding('UTF-32LE').valid_encoding?.should be_false str.force_encoding('Windows-1251').valid_encoding?.should be_true str.force_encoding('IBM437').valid_encoding?.should be_true str.force_encoding('IBM737').valid_encoding?.should be_true @@ -100,27 +100,25 @@ describe "String#valid_encoding?" do str.force_encoding('UTF8-MAC').valid_encoding?.should be_true end - ruby_version_is '3.0' do - it "returns true for IBM720 encoding self is valid in" do - str = "\xE6\x9D\x94" - str.force_encoding('IBM720').valid_encoding?.should be_true - str.force_encoding('CP720').valid_encoding?.should be_true - end + it "returns true for IBM720 encoding self is valid in" do + str = +"\xE6\x9D\x94" + str.force_encoding('IBM720').valid_encoding?.should be_true + str.force_encoding('CP720').valid_encoding?.should be_true end it "returns false if self is valid in one encoding, but invalid in the one it's tagged with" do - str = "\u{8765}" + str = +"\u{8765}" str.valid_encoding?.should be_true - str = str.force_encoding('ascii') + str.force_encoding('ascii') str.valid_encoding?.should be_false end it "returns false if self contains a character invalid in the associated encoding" do - "abc#{[0x80].pack('C')}".force_encoding('ascii').valid_encoding?.should be_false + "abc#{[0x80].pack('C')}".dup.force_encoding('ascii').valid_encoding?.should be_false end it "returns false if a valid String had an invalid character appended to it" do - str = "a" + str = +"a" str.valid_encoding?.should be_true str << [0xDD].pack('C').force_encoding('utf-8') str.valid_encoding?.should be_false |
