diff options
Diffstat (limited to 'spec/ruby/core/string')
170 files changed, 18923 insertions, 0 deletions
diff --git a/spec/ruby/core/string/allocate_spec.rb b/spec/ruby/core/string/allocate_spec.rb new file mode 100644 index 0000000000..30d5f60594 --- /dev/null +++ b/spec/ruby/core/string/allocate_spec.rb @@ -0,0 +1,19 @@ +require_relative '../../spec_helper' + +describe "String.allocate" do + it "returns an instance of String" do + str = String.allocate + str.should be_an_instance_of(String) + end + + it "returns a fully-formed String" do + str = String.allocate + str.size.should == 0 + str << "more" + str.should == "more" + end + + it "returns a binary String" do + String.allocate.encoding.should == Encoding::BINARY + end +end diff --git a/spec/ruby/core/string/append_as_bytes_spec.rb b/spec/ruby/core/string/append_as_bytes_spec.rb new file mode 100644 index 0000000000..b1703e5f89 --- /dev/null +++ b/spec/ruby/core/string/append_as_bytes_spec.rb @@ -0,0 +1,58 @@ +require_relative '../../spec_helper' + +describe "String#append_bytes" do + ruby_version_is "3.4" do + it "doesn't allow to mutate frozen strings" do + str = "hello".freeze + -> { str.append_as_bytes("\xE2\x82") }.should raise_error(FrozenError) + end + + it "allows creating broken strings" do + str = +"hello" + str.append_as_bytes("\xE2\x82") + str.valid_encoding?.should == false + + str.append_as_bytes("\xAC") + str.valid_encoding?.should == true + + str = "abc".encode(Encoding::UTF_32LE) + str.append_as_bytes("def") + str.encoding.should == Encoding::UTF_32LE + str.valid_encoding?.should == false + end + + it "never changes the receiver encoding" do + str = "".b + str.append_as_bytes("€") + str.encoding.should == Encoding::BINARY + end + + it "accepts variadic String or Integer arguments" do + str = "hello".b + str.append_as_bytes("\xE2\x82", 12, 43, "\xAC") + str.encoding.should == Encoding::BINARY + str.should == "hello\xE2\x82\f+\xAC".b + end + + it "truncates integers to the least significant byte" do + str = +"" + str.append_as_bytes(0x131, 0x232, 0x333, bignum_value, bignum_value(1)) + str.bytes.should == [0x31, 0x32, 0x33, 0, 1] + end + + it "wraps negative integers" do + str = "".b + str.append_as_bytes(-1, -bignum_value, -bignum_value(1)) + str.bytes.should == [0xFF, 0, 0xFF] + end + + it "only accepts strings or integers, and doesn't attempt to cast with #to_str or #to_int" do + to_str = mock("to_str") + to_str.should_not_receive(:to_str) + to_str.should_not_receive(:to_int) + + str = +"hello" + -> { str.append_as_bytes(to_str) }.should raise_error(TypeError, "wrong argument type MockObject (expected String or Integer)") + end + end +end diff --git a/spec/ruby/core/string/append_spec.rb b/spec/ruby/core/string/append_spec.rb new file mode 100644 index 0000000000..8497ce8262 --- /dev/null +++ b/spec/ruby/core/string/append_spec.rb @@ -0,0 +1,14 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/concat' + +describe "String#<<" do + it_behaves_like :string_concat, :<< + it_behaves_like :string_concat_encoding, :<< + it_behaves_like :string_concat_type_coercion, :<< + + it "raises an ArgumentError when given the incorrect number of arguments" do + -> { "hello".send(:<<) }.should raise_error(ArgumentError) + -> { "hello".send(:<<, "one", "two") }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/ascii_only_spec.rb b/spec/ruby/core/string/ascii_only_spec.rb new file mode 100644 index 0000000000..88a0559cfd --- /dev/null +++ b/spec/ruby/core/string/ascii_only_spec.rb @@ -0,0 +1,82 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#ascii_only?" do + describe "with ASCII only characters" do + it "returns true if the encoding is UTF-8" do + [ ["hello", true], + ["hello".encode('UTF-8'), true], + ["hello".dup.force_encoding('UTF-8'), true], + ].should be_computed_by(:ascii_only?) + end + + it "returns true if the encoding is US-ASCII" do + "hello".dup.force_encoding(Encoding::US_ASCII).ascii_only?.should be_true + "hello".encode(Encoding::US_ASCII).ascii_only?.should be_true + end + + it "returns true for all single-character UTF-8 Strings" do + 0.upto(127) do |n| + n.chr.ascii_only?.should be_true + end + end + end + + describe "with non-ASCII only characters" do + it "returns false if the encoding is BINARY" do + chr = 128.chr + chr.encoding.should == Encoding::BINARY + chr.ascii_only?.should be_false + end + + it "returns false if the String contains any non-ASCII characters" do + [ ["\u{6666}", false], + ["hello, \u{6666}", false], + ["\u{6666}".encode('UTF-8'), false], + ["\u{6666}".dup.force_encoding('UTF-8'), false], + ].should be_computed_by(:ascii_only?) + end + + it "returns false if the encoding is US-ASCII" do + [ ["\u{6666}".dup.force_encoding(Encoding::US_ASCII), false], + ["hello, \u{6666}".dup.force_encoding(Encoding::US_ASCII), false], + ].should be_computed_by(:ascii_only?) + end + end + + it "returns true for the empty String with an ASCII-compatible encoding" do + "".ascii_only?.should be_true + "".encode('UTF-8').ascii_only?.should be_true + end + + it "returns false for the empty String with a non-ASCII-compatible encoding" do + "".dup.force_encoding('UTF-16LE').ascii_only?.should be_false + "".encode('UTF-16BE').ascii_only?.should be_false + end + + it "returns false for a non-empty String with non-ASCII-compatible encoding" do + "\x78\x00".dup.force_encoding("UTF-16LE").ascii_only?.should be_false + end + + it "returns false when interpolating non ascii strings" do + base = "EU currency is".dup.force_encoding(Encoding::US_ASCII) + euro = "\u20AC" + interp = "#{base} #{euro}" + euro.ascii_only?.should be_false + base.ascii_only?.should be_true + interp.ascii_only?.should be_false + end + + it "returns false after appending non ASCII characters to an empty String" do + ("".dup << "λ").ascii_only?.should be_false + end + + it "returns false when concatenating an ASCII and non-ASCII String" do + "".dup.concat("λ").ascii_only?.should be_false + end + + it "returns false when replacing an ASCII String with a non-ASCII String" do + "".dup.replace("λ").ascii_only?.should be_false + end +end diff --git a/spec/ruby/core/string/b_spec.rb b/spec/ruby/core/string/b_spec.rb new file mode 100644 index 0000000000..4b1fafff11 --- /dev/null +++ b/spec/ruby/core/string/b_spec.rb @@ -0,0 +1,16 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' + +describe "String#b" do + it "returns a binary encoded string" do + "Hello".b.should == "Hello".force_encoding(Encoding::BINARY) + "こんちには".b.should == "こんちには".force_encoding(Encoding::BINARY) + end + + it "returns new string without modifying self" do + str = "こんちには" + str.b.should_not equal(str) + str.should == "こんちには" + end +end diff --git a/spec/ruby/core/string/byteindex_spec.rb b/spec/ruby/core/string/byteindex_spec.rb new file mode 100644 index 0000000000..d420f3f683 --- /dev/null +++ b/spec/ruby/core/string/byteindex_spec.rb @@ -0,0 +1,298 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/byte_index_common.rb' + +describe "String#byteindex" do + it "calls #to_str to convert the first argument" do + char = mock("string index char") + char.should_receive(:to_str).and_return("b") + "abc".byteindex(char).should == 1 + end + + it "calls #to_int to convert the second argument" do + offset = mock("string index offset") + offset.should_receive(:to_int).and_return(1) + "abc".byteindex("c", offset).should == 2 + end + + it "does not raise IndexError when byte offset is correct or on string boundary" do + "わ".byteindex("").should == 0 + "わ".byteindex("", 0).should == 0 + "わ".byteindex("", 3).should == 3 + end + + it_behaves_like :byte_index_common, :byteindex +end + +describe "String#byteindex with String" do + it "behaves the same as String#byteindex(char) for one-character strings" do + "blablabla hello cruel world...!".split("").uniq.each do |str| + chr = str[0] + str.byteindex(str).should == str.byteindex(chr) + + 0.upto(str.size + 1) do |start| + str.byteindex(str, start).should == str.byteindex(chr, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byteindex(str, start).should == str.byteindex(chr, start) + end + end + end + + it "returns the byteindex of the first occurrence of the given substring" do + "blablabla".byteindex("").should == 0 + "blablabla".byteindex("b").should == 0 + "blablabla".byteindex("bla").should == 0 + "blablabla".byteindex("blabla").should == 0 + "blablabla".byteindex("blablabla").should == 0 + + "blablabla".byteindex("l").should == 1 + "blablabla".byteindex("la").should == 1 + "blablabla".byteindex("labla").should == 1 + "blablabla".byteindex("lablabla").should == 1 + + "blablabla".byteindex("a").should == 2 + "blablabla".byteindex("abla").should == 2 + "blablabla".byteindex("ablabla").should == 2 + end + + it "treats the offset as a byteindex" do + "aaaaa".byteindex("a", 0).should == 0 + "aaaaa".byteindex("a", 2).should == 2 + "aaaaa".byteindex("a", 4).should == 4 + end + + it "ignores string subclasses" do + "blablabla".byteindex(StringSpecs::MyString.new("bla")).should == 0 + StringSpecs::MyString.new("blablabla").byteindex("bla").should == 0 + StringSpecs::MyString.new("blablabla").byteindex(StringSpecs::MyString.new("bla")).should == 0 + end + + it "starts the search at the given offset" do + "blablabla".byteindex("bl", 0).should == 0 + "blablabla".byteindex("bl", 1).should == 3 + "blablabla".byteindex("bl", 2).should == 3 + "blablabla".byteindex("bl", 3).should == 3 + + "blablabla".byteindex("bla", 0).should == 0 + "blablabla".byteindex("bla", 1).should == 3 + "blablabla".byteindex("bla", 2).should == 3 + "blablabla".byteindex("bla", 3).should == 3 + + "blablabla".byteindex("blab", 0).should == 0 + "blablabla".byteindex("blab", 1).should == 3 + "blablabla".byteindex("blab", 2).should == 3 + "blablabla".byteindex("blab", 3).should == 3 + + "blablabla".byteindex("la", 1).should == 1 + "blablabla".byteindex("la", 2).should == 4 + "blablabla".byteindex("la", 3).should == 4 + "blablabla".byteindex("la", 4).should == 4 + + "blablabla".byteindex("lab", 1).should == 1 + "blablabla".byteindex("lab", 2).should == 4 + "blablabla".byteindex("lab", 3).should == 4 + "blablabla".byteindex("lab", 4).should == 4 + + "blablabla".byteindex("ab", 2).should == 2 + "blablabla".byteindex("ab", 3).should == 5 + "blablabla".byteindex("ab", 4).should == 5 + "blablabla".byteindex("ab", 5).should == 5 + + "blablabla".byteindex("", 0).should == 0 + "blablabla".byteindex("", 1).should == 1 + "blablabla".byteindex("", 2).should == 2 + "blablabla".byteindex("", 7).should == 7 + "blablabla".byteindex("", 8).should == 8 + "blablabla".byteindex("", 9).should == 9 + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.byteindex(needle, offset).should == + str.byteindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".byteindex("B").should == nil + "blablabla".byteindex("z").should == nil + "blablabla".byteindex("BLA").should == nil + "blablabla".byteindex("blablablabla").should == nil + "blablabla".byteindex("", 10).should == nil + + "hello".byteindex("he", 1).should == nil + "hello".byteindex("he", 2).should == nil + "I’ve got a multibyte character.\n".byteindex("\n\n").should == nil + end + + it "returns the character byteindex of a multibyte character" do + "ありがとう".byteindex("が").should == 6 + end + + it "returns the character byteindex after offset" do + "われわれ".byteindex("わ", 3).should == 6 + "ありがとうありがとう".byteindex("が", 9).should == 21 + end + + it "returns the character byteindex after a partial first match" do + "</</h".byteindex("</h").should == 2 + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + char = "れ".encode Encoding::EUC_JP + -> do + "あれ".byteindex(char) + end.should raise_error(Encoding::CompatibilityError) + end + + it "handles a substring in a superset encoding" do + 'abc'.dup.force_encoding(Encoding::US_ASCII).byteindex('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.byteindex('t'.dup.force_encoding(Encoding::US_ASCII)).should == 2 + end +end + +describe "String#byteindex with Regexp" do + it "behaves the same as String#byteindex(string) for escaped string regexps" do + ["blablabla", "hello cruel world...!"].each do |str| + ["", "b", "bla", "lab", "o c", "d."].each do |needle| + regexp = Regexp.new(Regexp.escape(needle)) + str.byteindex(regexp).should == str.byteindex(needle) + + 0.upto(str.size + 1) do |start| + str.byteindex(regexp, start).should == str.byteindex(needle, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byteindex(regexp, start).should == str.byteindex(needle, start) + end + end + end + end + + it "returns the byteindex of the first match of regexp" do + "blablabla".byteindex(/bla/).should == 0 + "blablabla".byteindex(/BLA/i).should == 0 + + "blablabla".byteindex(/.{0}/).should == 0 + "blablabla".byteindex(/.{6}/).should == 0 + "blablabla".byteindex(/.{9}/).should == 0 + + "blablabla".byteindex(/.*/).should == 0 + "blablabla".byteindex(/.+/).should == 0 + + "blablabla".byteindex(/lab|b/).should == 0 + + not_supported_on :opal do + "blablabla".byteindex(/\A/).should == 0 + "blablabla".byteindex(/\Z/).should == 9 + "blablabla".byteindex(/\z/).should == 9 + "blablabla\n".byteindex(/\Z/).should == 9 + "blablabla\n".byteindex(/\z/).should == 10 + end + + "blablabla".byteindex(/^/).should == 0 + "\nblablabla".byteindex(/^/).should == 0 + "b\nablabla".byteindex(/$/).should == 1 + "bl\nablabla".byteindex(/$/).should == 2 + + "blablabla".byteindex(/.l./).should == 0 + end + + it "starts the search at the given offset" do + "blablabla".byteindex(/.{0}/, 5).should == 5 + "blablabla".byteindex(/.{1}/, 5).should == 5 + "blablabla".byteindex(/.{2}/, 5).should == 5 + "blablabla".byteindex(/.{3}/, 5).should == 5 + "blablabla".byteindex(/.{4}/, 5).should == 5 + + "blablabla".byteindex(/.{0}/, 3).should == 3 + "blablabla".byteindex(/.{1}/, 3).should == 3 + "blablabla".byteindex(/.{2}/, 3).should == 3 + "blablabla".byteindex(/.{5}/, 3).should == 3 + "blablabla".byteindex(/.{6}/, 3).should == 3 + + "blablabla".byteindex(/.l./, 0).should == 0 + "blablabla".byteindex(/.l./, 1).should == 3 + "blablabla".byteindex(/.l./, 2).should == 3 + "blablabla".byteindex(/.l./, 3).should == 3 + + "xblaxbla".byteindex(/x./, 0).should == 0 + "xblaxbla".byteindex(/x./, 1).should == 4 + "xblaxbla".byteindex(/x./, 2).should == 4 + + not_supported_on :opal do + "blablabla\n".byteindex(/\Z/, 9).should == 9 + end + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.byteindex(needle, offset).should == + str.byteindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".byteindex(/BLA/).should == nil + + "blablabla".byteindex(/.{10}/).should == nil + "blaxbla".byteindex(/.x/, 3).should == nil + "blaxbla".byteindex(/..x/, 2).should == nil + end + + it "returns nil if the Regexp matches the empty string and the offset is out of range" do + "ruby".byteindex(//, 12).should be_nil + end + + it "supports \\G which matches at the given start offset" do + "helloYOU.".byteindex(/\GYOU/, 5).should == 5 + "helloYOU.".byteindex(/\GYOU/).should == nil + + re = /\G.+YOU/ + # The # marks where \G will match. + [ + ["#hi!YOUall.", 0], + ["h#i!YOUall.", 1], + ["hi#!YOUall.", 2], + ["hi!#YOUall.", nil] + ].each do |spec| + + start = spec[0].byteindex("#") + str = spec[0].delete("#") + + str.byteindex(re, start).should == spec[1] + end + end + + it "converts start_offset to an integer via to_int" do + obj = mock('1') + obj.should_receive(:to_int).and_return(1) + "RWOARW".byteindex(/R./, obj).should == 4 + end + + it "returns the character byteindex of a multibyte character" do + "ありがとう".byteindex(/が/).should == 6 + end + + it "returns the character byteindex after offset" do + "われわれ".byteindex(/わ/, 3).should == 6 + end + + it "treats the offset as a byteindex" do + "われわわれ".byteindex(/わ/, 6).should == 6 + end +end diff --git a/spec/ruby/core/string/byterindex_spec.rb b/spec/ruby/core/string/byterindex_spec.rb new file mode 100644 index 0000000000..983222e35d --- /dev/null +++ b/spec/ruby/core/string/byterindex_spec.rb @@ -0,0 +1,353 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/byte_index_common.rb' + +describe "String#byterindex with object" do + it "tries to convert obj to a string via to_str" do + obj = mock('lo') + def obj.to_str() "lo" end + "hello".byterindex(obj).should == "hello".byterindex("lo") + + obj = mock('o') + def obj.respond_to?(arg, *) true end + def obj.method_missing(*args) "o" end + "hello".byterindex(obj).should == "hello".byterindex("o") + end + + it "calls #to_int to convert the second argument" do + offset = mock("string index offset") + offset.should_receive(:to_int).and_return(3) + "abc".byterindex("c", offset).should == 2 + end + + it "does not raise IndexError when byte offset is correct or on string boundary" do + "わ".byterindex("", 0).should == 0 + "わ".byterindex("", 3).should == 3 + "わ".byterindex("").should == 3 + end + + it_behaves_like :byte_index_common, :byterindex +end + +describe "String#byterindex with String" do + it "behaves the same as String#byterindex(char) for one-character strings" do + "blablabla hello cruel world...!".split("").uniq.each do |str| + chr = str[0] + str.byterindex(str).should == str.byterindex(chr) + + 0.upto(str.size + 1) do |start| + str.byterindex(str, start).should == str.byterindex(chr, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byterindex(str, start).should == str.byterindex(chr, start) + end + end + end + + it "behaves the same as String#byterindex(?char) for one-character strings" do + "blablabla hello cruel world...!".split("").uniq.each do |str| + chr = str[0] =~ / / ? str[0] : eval("?#{str[0]}") + str.byterindex(str).should == str.byterindex(chr) + + 0.upto(str.size + 1) do |start| + str.byterindex(str, start).should == str.byterindex(chr, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byterindex(str, start).should == str.byterindex(chr, start) + end + end + end + + it "returns the index of the last occurrence of the given substring" do + "blablabla".byterindex("").should == 9 + "blablabla".byterindex("a").should == 8 + "blablabla".byterindex("la").should == 7 + "blablabla".byterindex("bla").should == 6 + "blablabla".byterindex("abla").should == 5 + "blablabla".byterindex("labla").should == 4 + "blablabla".byterindex("blabla").should == 3 + "blablabla".byterindex("ablabla").should == 2 + "blablabla".byterindex("lablabla").should == 1 + "blablabla".byterindex("blablabla").should == 0 + + "blablabla".byterindex("l").should == 7 + "blablabla".byterindex("bl").should == 6 + "blablabla".byterindex("abl").should == 5 + "blablabla".byterindex("labl").should == 4 + "blablabla".byterindex("blabl").should == 3 + "blablabla".byterindex("ablabl").should == 2 + "blablabla".byterindex("lablabl").should == 1 + "blablabla".byterindex("blablabl").should == 0 + + "blablabla".byterindex("b").should == 6 + "blablabla".byterindex("ab").should == 5 + "blablabla".byterindex("lab").should == 4 + "blablabla".byterindex("blab").should == 3 + "blablabla".byterindex("ablab").should == 2 + "blablabla".byterindex("lablab").should == 1 + "blablabla".byterindex("blablab").should == 0 + end + + it "ignores string subclasses" do + "blablabla".byterindex(StringSpecs::MyString.new("bla")).should == 6 + StringSpecs::MyString.new("blablabla").byterindex("bla").should == 6 + StringSpecs::MyString.new("blablabla").byterindex(StringSpecs::MyString.new("bla")).should == 6 + end + + it "starts the search at the given offset" do + "blablabla".byterindex("bl", 0).should == 0 + "blablabla".byterindex("bl", 1).should == 0 + "blablabla".byterindex("bl", 2).should == 0 + "blablabla".byterindex("bl", 3).should == 3 + + "blablabla".byterindex("bla", 0).should == 0 + "blablabla".byterindex("bla", 1).should == 0 + "blablabla".byterindex("bla", 2).should == 0 + "blablabla".byterindex("bla", 3).should == 3 + + "blablabla".byterindex("blab", 0).should == 0 + "blablabla".byterindex("blab", 1).should == 0 + "blablabla".byterindex("blab", 2).should == 0 + "blablabla".byterindex("blab", 3).should == 3 + "blablabla".byterindex("blab", 6).should == 3 + "blablablax".byterindex("blab", 6).should == 3 + + "blablabla".byterindex("la", 1).should == 1 + "blablabla".byterindex("la", 2).should == 1 + "blablabla".byterindex("la", 3).should == 1 + "blablabla".byterindex("la", 4).should == 4 + + "blablabla".byterindex("lab", 1).should == 1 + "blablabla".byterindex("lab", 2).should == 1 + "blablabla".byterindex("lab", 3).should == 1 + "blablabla".byterindex("lab", 4).should == 4 + + "blablabla".byterindex("ab", 2).should == 2 + "blablabla".byterindex("ab", 3).should == 2 + "blablabla".byterindex("ab", 4).should == 2 + "blablabla".byterindex("ab", 5).should == 5 + + "blablabla".byterindex("", 0).should == 0 + "blablabla".byterindex("", 1).should == 1 + "blablabla".byterindex("", 2).should == 2 + "blablabla".byterindex("", 7).should == 7 + "blablabla".byterindex("", 8).should == 8 + "blablabla".byterindex("", 9).should == 9 + "blablabla".byterindex("", 10).should == 9 + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.byterindex(needle, offset).should == + str.byterindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".byterindex("B").should == nil + "blablabla".byterindex("z").should == nil + "blablabla".byterindex("BLA").should == nil + "blablabla".byterindex("blablablabla").should == nil + + "hello".byterindex("lo", 0).should == nil + "hello".byterindex("lo", 1).should == nil + "hello".byterindex("lo", 2).should == nil + + "hello".byterindex("llo", 0).should == nil + "hello".byterindex("llo", 1).should == nil + + "hello".byterindex("el", 0).should == nil + "hello".byterindex("ello", 0).should == nil + + "hello".byterindex("", -6).should == nil + "hello".byterindex("", -7).should == nil + + "hello".byterindex("h", -6).should == nil + end + + it "tries to convert start_offset to an integer via to_int" do + obj = mock('5') + def obj.to_int() 5 end + "str".byterindex("st", obj).should == 0 + + obj = mock('5') + def obj.respond_to?(arg, *) true end + def obj.method_missing(*args) 5 end + "str".byterindex("st", obj).should == 0 + end + + it "raises a TypeError when given offset is nil" do + -> { "str".byterindex("st", nil) }.should raise_error(TypeError) + end + + it "handles a substring in a superset encoding" do + 'abc'.dup.force_encoding(Encoding::US_ASCII).byterindex('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.byterindex('t'.dup.force_encoding(Encoding::US_ASCII)).should == 2 + end +end + +describe "String#byterindex with Regexp" do + it "behaves the same as String#byterindex(string) for escaped string regexps" do + ["blablabla", "hello cruel world...!"].each do |str| + ["", "b", "bla", "lab", "o c", "d."].each do |needle| + regexp = Regexp.new(Regexp.escape(needle)) + str.byterindex(regexp).should == str.byterindex(needle) + + 0.upto(str.size + 1) do |start| + str.byterindex(regexp, start).should == str.byterindex(needle, start) + end + + (-str.size - 1).upto(-1) do |start| + str.byterindex(regexp, start).should == str.byterindex(needle, start) + end + end + end + end + + it "returns the index of the first match from the end of string of regexp" do + "blablabla".byterindex(/bla/).should == 6 + "blablabla".byterindex(/BLA/i).should == 6 + + "blablabla".byterindex(/.{0}/).should == 9 + "blablabla".byterindex(/.{1}/).should == 8 + "blablabla".byterindex(/.{2}/).should == 7 + "blablabla".byterindex(/.{6}/).should == 3 + "blablabla".byterindex(/.{9}/).should == 0 + + "blablabla".byterindex(/.*/).should == 9 + "blablabla".byterindex(/.+/).should == 8 + + "blablabla".byterindex(/bla|a/).should == 8 + + not_supported_on :opal do + "blablabla".byterindex(/\A/).should == 0 + "blablabla".byterindex(/\Z/).should == 9 + "blablabla".byterindex(/\z/).should == 9 + "blablabla\n".byterindex(/\Z/).should == 10 + "blablabla\n".byterindex(/\z/).should == 10 + end + + "blablabla".byterindex(/^/).should == 0 + not_supported_on :opal do + "\nblablabla".byterindex(/^/).should == 1 + "b\nlablabla".byterindex(/^/).should == 2 + end + "blablabla".byterindex(/$/).should == 9 + + "blablabla".byterindex(/.l./).should == 6 + end + + it "starts the search at the given offset" do + "blablabla".byterindex(/.{0}/, 5).should == 5 + "blablabla".byterindex(/.{1}/, 5).should == 5 + "blablabla".byterindex(/.{2}/, 5).should == 5 + "blablabla".byterindex(/.{3}/, 5).should == 5 + "blablabla".byterindex(/.{4}/, 5).should == 5 + + "blablabla".byterindex(/.{0}/, 3).should == 3 + "blablabla".byterindex(/.{1}/, 3).should == 3 + "blablabla".byterindex(/.{2}/, 3).should == 3 + "blablabla".byterindex(/.{5}/, 3).should == 3 + "blablabla".byterindex(/.{6}/, 3).should == 3 + + "blablabla".byterindex(/.l./, 0).should == 0 + "blablabla".byterindex(/.l./, 1).should == 0 + "blablabla".byterindex(/.l./, 2).should == 0 + "blablabla".byterindex(/.l./, 3).should == 3 + + "blablablax".byterindex(/.x/, 10).should == 8 + "blablablax".byterindex(/.x/, 9).should == 8 + "blablablax".byterindex(/.x/, 8).should == 8 + + "blablablax".byterindex(/..x/, 10).should == 7 + "blablablax".byterindex(/..x/, 9).should == 7 + "blablablax".byterindex(/..x/, 8).should == 7 + "blablablax".byterindex(/..x/, 7).should == 7 + + not_supported_on :opal do + "blablabla\n".byterindex(/\Z/, 9).should == 9 + end + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.byterindex(needle, offset).should == + str.byterindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".byterindex(/BLA/).should == nil + "blablabla".byterindex(/.{10}/).should == nil + "blablablax".byterindex(/.x/, 7).should == nil + "blablablax".byterindex(/..x/, 6).should == nil + + not_supported_on :opal do + "blablabla".byterindex(/\Z/, 5).should == nil + "blablabla".byterindex(/\z/, 5).should == nil + "blablabla\n".byterindex(/\z/, 9).should == nil + end + end + + not_supported_on :opal do + it "supports \\G which matches at the given start offset" do + "helloYOU.".byterindex(/YOU\G/, 8).should == 5 + "helloYOU.".byterindex(/YOU\G/).should == nil + + idx = "helloYOUall!".index("YOU") + re = /YOU.+\G.+/ + # The # marks where \G will match. + [ + ["helloYOU#all.", nil], + ["helloYOUa#ll.", idx], + ["helloYOUal#l.", idx], + ["helloYOUall#.", idx], + ["helloYOUall.#", nil] + ].each do |i| + start = i[0].index("#") + str = i[0].delete("#") + + str.byterindex(re, start).should == i[1] + end + end + end + + it "tries to convert start_offset to an integer" do + obj = mock('5') + def obj.to_int() 5 end + "str".byterindex(/../, obj).should == 1 + + obj = mock('5') + def obj.respond_to?(arg, *) true end + def obj.method_missing(*args); 5; end + "str".byterindex(/../, obj).should == 1 + end + + it "raises a TypeError when given offset is nil" do + -> { "str".byterindex(/../, nil) }.should raise_error(TypeError) + end + + it "returns the reverse byte index of a multibyte character" do + "ありがりがとう".byterindex("が").should == 12 + "ありがりがとう".byterindex(/が/).should == 12 + end + + it "returns the character index before the finish" do + "ありがりがとう".byterindex("が", 9).should == 6 + "ありがりがとう".byterindex(/が/, 9).should == 6 + end +end diff --git a/spec/ruby/core/string/bytes_spec.rb b/spec/ruby/core/string/bytes_spec.rb new file mode 100644 index 0000000000..02151eebbc --- /dev/null +++ b/spec/ruby/core/string/bytes_spec.rb @@ -0,0 +1,55 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' + +describe "String#bytes" do + before :each do + @utf8 = "東京" + @ascii = 'Tokyo' + @utf8_ascii = @utf8 + @ascii + end + + it "returns an Array when no block is given" do + @utf8.bytes.should be_an_instance_of(Array) + end + + it "yields each byte to a block if one is given, returning self" do + bytes = [] + @utf8.bytes {|b| bytes << b}.should == @utf8 + bytes.should == @utf8.bytes.to_a + end + + it "returns #bytesize bytes" do + @utf8_ascii.bytes.to_a.size.should == @utf8_ascii.bytesize + end + + it "returns bytes as Integers" do + @ascii.bytes.to_a.each {|b| b.should be_an_instance_of(Integer)} + @utf8_ascii.bytes { |b| b.should be_an_instance_of(Integer) } + end + + it "agrees with #unpack('C*')" do + @utf8_ascii.bytes.to_a.should == @utf8_ascii.unpack("C*") + end + + it "yields/returns no bytes for the empty string" do + ''.bytes.to_a.should == [] + end +end + +describe "String#bytes" do + before :each do + @utf8 = "東京" + @ascii = 'Tokyo' + @utf8_ascii = @utf8 + @ascii + end + + it "agrees with #getbyte" do + @utf8_ascii.bytes.to_a.each_with_index do |byte,index| + byte.should == @utf8_ascii.getbyte(index) + end + end + + it "is unaffected by #force_encoding" do + @utf8.dup.force_encoding('ASCII').bytes.to_a.should == @utf8.bytes.to_a + end +end diff --git a/spec/ruby/core/string/bytesize_spec.rb b/spec/ruby/core/string/bytesize_spec.rb new file mode 100644 index 0000000000..2bbefc0820 --- /dev/null +++ b/spec/ruby/core/string/bytesize_spec.rb @@ -0,0 +1,33 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#bytesize" do + it "returns the length of self in bytes" do + "hello".bytesize.should == 5 + " ".bytesize.should == 1 + end + + it "works with strings containing single UTF-8 characters" do + "\u{6666}".bytesize.should == 3 + end + + it "works with pseudo-ASCII strings containing single UTF-8 characters" do + "\u{6666}".dup.force_encoding('ASCII').bytesize.should == 3 + end + + it "works with strings containing UTF-8 characters" do + "c \u{6666}".dup.force_encoding('UTF-8').bytesize.should == 5 + "c \u{6666}".bytesize.should == 5 + end + + it "works with pseudo-ASCII strings containing UTF-8 characters" do + "c \u{6666}".dup.force_encoding('ASCII').bytesize.should == 5 + end + + it "returns 0 for the empty string" do + "".bytesize.should == 0 + "".dup.force_encoding('ASCII').bytesize.should == 0 + "".dup.force_encoding('UTF-8').bytesize.should == 0 + end +end diff --git a/spec/ruby/core/string/byteslice_spec.rb b/spec/ruby/core/string/byteslice_spec.rb new file mode 100644 index 0000000000..4ad9e8d8f1 --- /dev/null +++ b/spec/ruby/core/string/byteslice_spec.rb @@ -0,0 +1,33 @@ +# encoding: binary +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/slice' + +describe "String#byteslice" do + it "needs to reviewed for spec completeness" + + it_behaves_like :string_slice, :byteslice +end + +describe "String#byteslice with index, length" do + it_behaves_like :string_slice_index_length, :byteslice +end + +describe "String#byteslice with Range" do + it_behaves_like :string_slice_range, :byteslice +end + +describe "String#byteslice on non ASCII strings" do + it "returns byteslice of unicode strings" do + "\u3042".byteslice(1).should == "\x81".dup.force_encoding("UTF-8") + "\u3042".byteslice(1, 2).should == "\x81\x82".dup.force_encoding("UTF-8") + "\u3042".byteslice(1..2).should == "\x81\x82".dup.force_encoding("UTF-8") + "\u3042".byteslice(-1).should == "\x82".dup.force_encoding("UTF-8") + end + + it "returns a String in the same encoding as self" do + "ruby".encode("UTF-8").slice(0).encoding.should == Encoding::UTF_8 + "ruby".encode("US-ASCII").slice(0).encoding.should == Encoding::US_ASCII + "ruby".encode("Windows-1251").slice(0).encoding.should == Encoding::Windows_1251 + end +end diff --git a/spec/ruby/core/string/bytesplice_spec.rb b/spec/ruby/core/string/bytesplice_spec.rb new file mode 100644 index 0000000000..2c770e340a --- /dev/null +++ b/spec/ruby/core/string/bytesplice_spec.rb @@ -0,0 +1,294 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' + +describe "String#bytesplice" do + it "raises IndexError when index is less than -bytesize" do + -> { "hello".bytesplice(-6, 0, "xxx") }.should raise_error(IndexError, "index -6 out of string") + end + + it "raises IndexError when index is greater than bytesize" do + -> { "hello".bytesplice(6, 0, "xxx") }.should raise_error(IndexError, "index 6 out of string") + end + + it "raises IndexError for negative length" do + -> { "abc".bytesplice(0, -2, "") }.should raise_error(IndexError, "negative length -2") + end + + it "replaces with integer indices" do + "hello".bytesplice(-5, 0, "xxx").should == "xxxhello" + "hello".bytesplice(0, 0, "xxx").should == "xxxhello" + "hello".bytesplice(0, 1, "xxx").should == "xxxello" + "hello".bytesplice(0, 5, "xxx").should == "xxx" + "hello".bytesplice(0, 6, "xxx").should == "xxx" + end + + it "raises RangeError when range left boundary is less than -bytesize" do + -> { "hello".bytesplice(-6...-6, "xxx") }.should raise_error(RangeError, "-6...-6 out of range") + end + + it "replaces with ranges" do + "hello".bytesplice(-5...-5, "xxx").should == "xxxhello" + "hello".bytesplice(0...0, "xxx").should == "xxxhello" + "hello".bytesplice(0..0, "xxx").should == "xxxello" + "hello".bytesplice(0...1, "xxx").should == "xxxello" + "hello".bytesplice(0..1, "xxx").should == "xxxllo" + "hello".bytesplice(0..-1, "xxx").should == "xxx" + "hello".bytesplice(0...5, "xxx").should == "xxx" + "hello".bytesplice(0...6, "xxx").should == "xxx" + end + + it "raises TypeError when integer index is provided without length argument" do + -> { "hello".bytesplice(0, "xxx") }.should raise_error(TypeError, "wrong argument type Integer (expected Range)") + end + + it "replaces on an empty string" do + "".bytesplice(0, 0, "").should == "" + "".bytesplice(0, 0, "xxx").should == "xxx" + end + + it "mutates self" do + s = "hello" + s.bytesplice(2, 1, "xxx").should.equal?(s) + end + + it "raises when string is frozen" do + s = "hello".freeze + -> { s.bytesplice(2, 1, "xxx") }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") + end + + ruby_version_is "3.3" do + it "raises IndexError when str_index is less than -bytesize" do + -> { "hello".bytesplice(2, 1, "HELLO", -6, 0) }.should raise_error(IndexError, "index -6 out of string") + end + + it "raises IndexError when str_index is greater than bytesize" do + -> { "hello".bytesplice(2, 1, "HELLO", 6, 0) }.should raise_error(IndexError, "index 6 out of string") + end + + it "raises IndexError for negative str length" do + -> { "abc".bytesplice(0, 1, "", 0, -2) }.should raise_error(IndexError, "negative length -2") + end + + it "replaces with integer str indices" do + "hello".bytesplice(1, 2, "HELLO", -5, 0).should == "hlo" + "hello".bytesplice(1, 2, "HELLO", 0, 0).should == "hlo" + "hello".bytesplice(1, 2, "HELLO", 0, 1).should == "hHlo" + "hello".bytesplice(1, 2, "HELLO", 0, 5).should == "hHELLOlo" + "hello".bytesplice(1, 2, "HELLO", 0, 6).should == "hHELLOlo" + end + + it "raises RangeError when str range left boundary is less than -bytesize" do + -> { "hello".bytesplice(0..1, "HELLO", -6...-6) }.should raise_error(RangeError, "-6...-6 out of range") + end + + it "replaces with str ranges" do + "hello".bytesplice(1..2, "HELLO", -5...-5).should == "hlo" + "hello".bytesplice(1..2, "HELLO", 0...0).should == "hlo" + "hello".bytesplice(1..2, "HELLO", 0..0).should == "hHlo" + "hello".bytesplice(1..2, "HELLO", 0...1).should == "hHlo" + "hello".bytesplice(1..2, "HELLO", 0..1).should == "hHElo" + "hello".bytesplice(1..2, "HELLO", 0..-1).should == "hHELLOlo" + "hello".bytesplice(1..2, "HELLO", 0...5).should == "hHELLOlo" + "hello".bytesplice(1..2, "HELLO", 0...6).should == "hHELLOlo" + end + + it "raises ArgumentError when integer str index is provided without str length argument" do + -> { "hello".bytesplice(0, 1, "xxx", 0) }.should raise_error(ArgumentError, "wrong number of arguments (given 4, expected 2, 3, or 5)") + end + + it "replaces on an empty string with str index/length" do + "".bytesplice(0, 0, "", 0, 0).should == "" + "".bytesplice(0, 0, "xxx", 0, 1).should == "x" + end + + it "mutates self with substring and str index/length" do + s = "hello" + s.bytesplice(2, 1, "xxx", 1, 2).should.equal?(s) + s.should.eql?("hexxlo") + end + + it "raises when string is frozen and str index/length" do + s = "hello".freeze + -> { s.bytesplice(2, 1, "xxx", 0, 1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") + end + + it "replaces on an empty string with str range" do + "".bytesplice(0..0, "", 0..0).should == "" + "".bytesplice(0..0, "xyz", 0..1).should == "xy" + end + + it "mutates self with substring and str range" do + s = "hello" + s.bytesplice(2..2, "xyz", 1..2).should.equal?(s) + s.should.eql?("heyzlo") + end + + it "raises when string is frozen and str range" do + s = "hello".freeze + -> { s.bytesplice(2..2, "yzx", 0..1) }.should raise_error(FrozenError, "can't modify frozen String: \"hello\"") + end + end +end + +describe "String#bytesplice with multibyte characters" do + it "raises IndexError when index is out of byte size boundary" do + -> { "こんにちは".bytesplice(-16, 0, "xxx") }.should raise_error(IndexError, "index -16 out of string") + end + + it "raises IndexError when index is not on a codepoint boundary" do + -> { "こんにちは".bytesplice(1, 0, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary") + end + + it "raises IndexError when length is not matching the codepoint boundary" do + -> { "こんにちは".bytesplice(0, 1, "xxx") }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(0, 2, "xxx") }.should raise_error(IndexError, "offset 2 does not land on character boundary") + end + + it "replaces with integer indices" do + "こんにちは".bytesplice(-15, 0, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0, 0, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0, 3, "xxx").should == "xxxんにちは" + "こんにちは".bytesplice(3, 3, "はは").should == "こははにちは" + "こんにちは".bytesplice(15, 0, "xxx").should == "こんにちはxxx" + end + + it "replaces with range" do + "こんにちは".bytesplice(-15...-16, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0...0, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(0..2, "xxx").should == "xxxんにちは" + "こんにちは".bytesplice(0...3, "xxx").should == "xxxんにちは" + "こんにちは".bytesplice(0..5, "xxx").should == "xxxにちは" + "こんにちは".bytesplice(0..-1, "xxx").should == "xxx" + "こんにちは".bytesplice(0...15, "xxx").should == "xxx" + "こんにちは".bytesplice(0...18, "xxx").should == "xxx" + end + + it "treats negative length for range as 0" do + "こんにちは".bytesplice(0...-100, "xxx").should == "xxxこんにちは" + "こんにちは".bytesplice(3...-100, "xxx").should == "こxxxんにちは" + "こんにちは".bytesplice(-15...-100, "xxx").should == "xxxこんにちは" + end + + it "raises when ranges not match codepoint boundaries" do + -> { "こんにちは".bytesplice(0..0, "x") }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(0..1, "x") }.should raise_error(IndexError, "offset 2 does not land on character boundary") + # Begin is incorrect + -> { "こんにちは".bytesplice(-4..-1, "x") }.should raise_error(IndexError, "offset 11 does not land on character boundary") + -> { "こんにちは".bytesplice(-5..-1, "x") }.should raise_error(IndexError, "offset 10 does not land on character boundary") + # End is incorrect + -> { "こんにちは".bytesplice(-3..-2, "x") }.should raise_error(IndexError, "offset 14 does not land on character boundary") + -> { "こんにちは".bytesplice(-3..-3, "x") }.should raise_error(IndexError, "offset 13 does not land on character boundary") + end + + it "deals with a different encoded argument" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "xxxxxx" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(0, 3, sub) + result.should == "xxxxxxんにちは" + result.encoding.should == Encoding::UTF_8 + + s = "xxxxxx" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(0, 3, sub) + result.should == "こんにちはxxx" + result.encoding.should == Encoding::UTF_8 + end + + ruby_version_is "3.3" do + it "raises IndexError when str_index is out of byte size boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", -16, 0) }.should raise_error(IndexError, "index -16 out of string") + end + + it "raises IndexError when str_index is not on a codepoint boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 1, 0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + end + + it "raises IndexError when str_length is not matching the codepoint boundary" do + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(3, 3, "こんにちは", 0, 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + end + + it "replaces with integer str indices" do + "こんにちは".bytesplice(3, 3, "こんにちは", -15, 0).should == "こにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 0, 0).should == "こにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 0, 3).should == "ここにちは" + "こんにちは".bytesplice(3, 3, "はは", 3, 3).should == "こはにちは" + "こんにちは".bytesplice(3, 3, "こんにちは", 15, 0).should == "こにちは" + end + + it "replaces with str range" do + "こんにちは".bytesplice(0..2, "こんにちは", -15...-16).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...0).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3..5).should == "んんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3...6).should == "んんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3..8).should == "んにんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0..-1).should == "こんにちはんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...15).should == "こんにちはんにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 0...18).should == "こんにちはんにちは" + end + + it "treats negative length for str range as 0" do + "こんにちは".bytesplice(0..2, "こんにちは", 0...-100).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", 3...-100).should == "んにちは" + "こんにちは".bytesplice(0..2, "こんにちは", -15...-100).should == "んにちは" + end + + it "raises when ranges not match codepoint boundaries in str" do + -> { "こんにちは".bytesplice(3...3, "こ", 0..0) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こ", 0..1) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + # Begin is incorrect + -> { "こんにちは".bytesplice(3...3, "こんにちは", -4..-1) }.should raise_error(IndexError, "offset 11 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こんにちは", -5..-1) }.should raise_error(IndexError, "offset 10 does not land on character boundary") + # End is incorrect + -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-2) }.should raise_error(IndexError, "offset 14 does not land on character boundary") + -> { "こんにちは".bytesplice(3...3, "こんにちは", -3..-3) }.should raise_error(IndexError, "offset 13 does not land on character boundary") + end + + it "deals with a different encoded argument with str index/length" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "goodbye" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(3, 3, sub, 0, 3) + result.should == "こgooにちは" + result.encoding.should == Encoding::UTF_8 + + s = "hello" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(1, 2, sub, 3, 3) + result.should == "hんlo" + result.encoding.should == Encoding::UTF_8 + end + + it "deals with a different encoded argument with str range" do + s = "こんにちは" + s.encoding.should == Encoding::UTF_8 + sub = "goodbye" + sub.force_encoding(Encoding::US_ASCII) + + result = s.bytesplice(3..5, sub, 0..2) + result.should == "こgooにちは" + result.encoding.should == Encoding::UTF_8 + + s = "hello" + s.force_encoding(Encoding::US_ASCII) + sub = "こんにちは" + sub.encoding.should == Encoding::UTF_8 + + result = s.bytesplice(1..2, sub, 3..5) + result.should == "hんlo" + result.encoding.should == Encoding::UTF_8 + end + end +end diff --git a/spec/ruby/core/string/capitalize_spec.rb b/spec/ruby/core/string/capitalize_spec.rb new file mode 100644 index 0000000000..5e59b656c5 --- /dev/null +++ b/spec/ruby/core/string/capitalize_spec.rb @@ -0,0 +1,207 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#capitalize" do + it "returns a copy of self with the first character converted to uppercase and the remainder to lowercase" do + "".capitalize.should == "" + "h".capitalize.should == "H" + "H".capitalize.should == "H" + "hello".capitalize.should == "Hello" + "HELLO".capitalize.should == "Hello" + "123ABC".capitalize.should == "123abc" + "abcdef"[1...-1].capitalize.should == "Bcde" + end + + describe "full Unicode case mapping" do + it "works for all of Unicode with no option" do + "äöÜ".capitalize.should == "Äöü" + end + + it "only capitalizes the first resulting character when upcasing a character produces a multi-character sequence" do + "ß".capitalize.should == "Ss" + end + + it "updates string metadata" do + capitalized = "ßeT".capitalize + + capitalized.should == "Sset" + capitalized.size.should == 4 + capitalized.bytesize.should == 4 + capitalized.ascii_only?.should be_true + end + end + + describe "ASCII-only case mapping" do + it "does not capitalize non-ASCII characters" do + "ßet".capitalize(:ascii).should == "ßet" + end + + it "handles non-ASCII substrings properly" do + "garçon"[1...-1].capitalize(:ascii).should == "Arço" + end + end + + describe "full Unicode case mapping adapted for Turkic languages" do + it "capitalizes ASCII characters according to Turkic semantics" do + "iSa".capitalize(:turkic).should == "İsa" + end + + it "allows Lithuanian as an extra option" do + "iSa".capitalize(:turkic, :lithuanian).should == "İsa" + end + + it "does not allow any other additional option" do + -> { "iSa".capitalize(:turkic, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "full Unicode case mapping adapted for Lithuanian" do + it "currently works the same as full Unicode case mapping" do + "iß".capitalize(:lithuanian).should == "Iß" + end + + it "allows Turkic as an extra option (and applies Turkic semantics)" do + "iß".capitalize(:lithuanian, :turkic).should == "İß" + end + + it "does not allow any other additional option" do + -> { "iß".capitalize(:lithuanian, :ascii) }.should raise_error(ArgumentError) + end + end + + it "does not allow the :fold option for upcasing" do + -> { "abc".capitalize(:fold) }.should raise_error(ArgumentError) + end + + it "does not allow invalid options" do + -> { "abc".capitalize(:invalid_option) }.should raise_error(ArgumentError) + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("hello").capitalize.should be_an_instance_of(String) + StringSpecs::MyString.new("Hello").capitalize.should be_an_instance_of(String) + end + + it "returns a String in the same encoding as self" do + "h".encode("US-ASCII").capitalize.encoding.should == Encoding::US_ASCII + end +end + +describe "String#capitalize!" do + it "capitalizes self in place" do + a = +"hello" + a.capitalize!.should equal(a) + a.should == "Hello" + end + + it "modifies self in place for non-ascii-compatible encodings" do + a = "heLLo".encode("utf-16le") + a.capitalize! + a.should == "Hello".encode("utf-16le") + end + + describe "full Unicode case mapping" do + it "modifies self in place for all of Unicode with no option" do + a = +"äöÜ" + a.capitalize! + a.should == "Äöü" + end + + it "only capitalizes the first resulting character when upcasing a character produces a multi-character sequence" do + a = +"ß" + a.capitalize! + a.should == "Ss" + end + + it "works for non-ascii-compatible encodings" do + a = "äöü".encode("utf-16le") + a.capitalize! + a.should == "Äöü".encode("utf-16le") + end + + it "updates string metadata" do + capitalized = +"ßeT" + capitalized.capitalize! + + capitalized.should == "Sset" + capitalized.size.should == 4 + capitalized.bytesize.should == 4 + capitalized.ascii_only?.should be_true + end + end + + describe "modifies self in place for ASCII-only case mapping" do + it "does not capitalize non-ASCII characters" do + a = +"ßet" + a.capitalize!(:ascii) + a.should == "ßet" + end + + it "works for non-ascii-compatible encodings" do + a = "aBc".encode("utf-16le") + a.capitalize!(:ascii) + a.should == "Abc".encode("utf-16le") + end + end + + describe "modifies self in place for full Unicode case mapping adapted for Turkic languages" do + it "capitalizes ASCII characters according to Turkic semantics" do + a = +"iSa" + a.capitalize!(:turkic) + a.should == "İsa" + end + + it "allows Lithuanian as an extra option" do + a = +"iSa" + a.capitalize!(:turkic, :lithuanian) + a.should == "İsa" + end + + it "does not allow any other additional option" do + -> { a = "iSa"; a.capitalize!(:turkic, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "modifies self in place for full Unicode case mapping adapted for Lithuanian" do + it "currently works the same as full Unicode case mapping" do + a = +"iß" + a.capitalize!(:lithuanian) + a.should == "Iß" + end + + it "allows Turkic as an extra option (and applies Turkic semantics)" do + a = +"iß" + a.capitalize!(:lithuanian, :turkic) + a.should == "İß" + end + + it "does not allow any other additional option" do + -> { a = "iß"; a.capitalize!(:lithuanian, :ascii) }.should raise_error(ArgumentError) + end + end + + it "does not allow the :fold option for upcasing" do + -> { a = "abc"; a.capitalize!(:fold) }.should raise_error(ArgumentError) + end + + it "does not allow invalid options" do + -> { a = "abc"; a.capitalize!(:invalid_option) }.should raise_error(ArgumentError) + end + + it "returns nil when no changes are made" do + a = +"Hello" + a.capitalize!.should == nil + a.should == "Hello" + + (+"").capitalize!.should == nil + (+"H").capitalize!.should == nil + end + + it "raises a FrozenError when self is frozen" do + ["", "Hello", "hello"].each do |a| + a.freeze + -> { a.capitalize! }.should raise_error(FrozenError) + end + end +end diff --git a/spec/ruby/core/string/case_compare_spec.rb b/spec/ruby/core/string/case_compare_spec.rb new file mode 100644 index 0000000000..b83d1adb91 --- /dev/null +++ b/spec/ruby/core/string/case_compare_spec.rb @@ -0,0 +1,8 @@ +require_relative '../../spec_helper' +require_relative 'shared/eql' +require_relative 'shared/equal_value' + +describe "String#===" do + it_behaves_like :string_eql_value, :=== + it_behaves_like :string_equal_value, :=== +end diff --git a/spec/ruby/core/string/casecmp_spec.rb b/spec/ruby/core/string/casecmp_spec.rb new file mode 100644 index 0000000000..81ebea557c --- /dev/null +++ b/spec/ruby/core/string/casecmp_spec.rb @@ -0,0 +1,204 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#casecmp independent of case" do + it "returns -1 when less than other" do + "a".casecmp("b").should == -1 + "A".casecmp("b").should == -1 + end + + it "returns 0 when equal to other" do + "a".casecmp("a").should == 0 + "A".casecmp("a").should == 0 + end + + it "returns 1 when greater than other" do + "b".casecmp("a").should == 1 + "B".casecmp("a").should == 1 + end + + it "tries to convert other to string using to_str" do + other = mock('x') + other.should_receive(:to_str).and_return("abc") + + "abc".casecmp(other).should == 0 + end + + it "returns nil if other can't be converted to a string" do + "abc".casecmp(mock('abc')).should be_nil + end + + it "returns nil if incompatible encodings" do + "あれ".casecmp("れ".encode(Encoding::EUC_JP)).should be_nil + end + + describe "in UTF-8 mode" do + describe "for non-ASCII characters" do + before :each do + @upper_a_tilde = "Ã" + @lower_a_tilde = "ã" + @upper_a_umlaut = "Ä" + @lower_a_umlaut = "ä" + end + + it "returns -1 when numerically less than other" do + @upper_a_tilde.casecmp(@lower_a_tilde).should == -1 + @upper_a_tilde.casecmp(@upper_a_umlaut).should == -1 + end + + it "returns 0 when numerically equal to other" do + @upper_a_tilde.casecmp(@upper_a_tilde).should == 0 + end + + it "returns 1 when numerically greater than other" do + @lower_a_umlaut.casecmp(@upper_a_umlaut).should == 1 + @lower_a_umlaut.casecmp(@lower_a_tilde).should == 1 + end + end + + describe "for ASCII characters" do + it "returns -1 when less than other" do + "a".casecmp("b").should == -1 + "A".casecmp("b").should == -1 + end + + it "returns 0 when equal to other" do + "a".casecmp("a").should == 0 + "A".casecmp("a").should == 0 + end + + it "returns 1 when greater than other" do + "b".casecmp("a").should == 1 + "B".casecmp("a").should == 1 + end + end + end + + describe "for non-ASCII characters" do + before :each do + @upper_a_tilde = "\xc3" + @lower_a_tilde = "\xe3" + end + + it "returns -1 when numerically less than other" do + @upper_a_tilde.casecmp(@lower_a_tilde).should == -1 + end + + it "returns 0 when equal to other" do + @upper_a_tilde.casecmp("\xc3").should == 0 + end + + it "returns 1 when numerically greater than other" do + @lower_a_tilde.casecmp(@upper_a_tilde).should == 1 + end + + it "does not case fold" do + "ß".casecmp("ss").should == 1 + end + end + + describe "when comparing a subclass instance" do + it "returns -1 when less than other" do + b = StringSpecs::MyString.new "b" + "a".casecmp(b).should == -1 + "A".casecmp(b).should == -1 + end + + it "returns 0 when equal to other" do + a = StringSpecs::MyString.new "a" + "a".casecmp(a).should == 0 + "A".casecmp(a).should == 0 + end + + it "returns 1 when greater than other" do + a = StringSpecs::MyString.new "a" + "b".casecmp(a).should == 1 + "B".casecmp(a).should == 1 + end + end + + it "returns 0 for empty strings in different encodings" do + ''.b.casecmp('').should == 0 + ''.b.casecmp(''.encode("UTF-32LE")).should == 0 + end +end + +describe 'String#casecmp? independent of case' do + it 'returns true when equal to other' do + 'abc'.casecmp?('abc').should == true + 'abc'.casecmp?('ABC').should == true + end + + it 'returns false when not equal to other' do + 'abc'.casecmp?('DEF').should == false + 'abc'.casecmp?('def').should == false + end + + it "tries to convert other to string using to_str" do + other = mock('x') + other.should_receive(:to_str).and_return("abc") + + "abc".casecmp?(other).should == true + end + + it "returns nil if incompatible encodings" do + "あれ".casecmp?("れ".encode(Encoding::EUC_JP)).should be_nil + end + + describe 'for UNICODE characters' do + it 'returns true when downcase(:fold) on unicode' do + 'äöü'.casecmp?('ÄÖÜ').should == true + end + end + + describe "when comparing a subclass instance" do + it 'returns true when equal to other' do + a = StringSpecs::MyString.new "a" + 'a'.casecmp?(a).should == true + 'A'.casecmp?(a).should == true + end + + it 'returns false when not equal to other' do + b = StringSpecs::MyString.new "a" + 'b'.casecmp?(b).should == false + 'B'.casecmp?(b).should == false + end + end + + describe "in UTF-8 mode" do + describe "for non-ASCII characters" do + before :each do + @upper_a_tilde = "Ã" + @lower_a_tilde = "ã" + @upper_a_umlaut = "Ä" + @lower_a_umlaut = "ä" + end + + it "returns true when they are the same with normalized case" do + @upper_a_tilde.casecmp?(@lower_a_tilde).should == true + end + + it "returns false when they are unrelated" do + @upper_a_tilde.casecmp?(@upper_a_umlaut).should == false + end + + it "returns true when they have the same bytes" do + @upper_a_tilde.casecmp?(@upper_a_tilde).should == true + end + end + end + + it "case folds" do + "ß".casecmp?("ss").should be_true + end + + it "returns nil if other can't be converted to a string" do + "abc".casecmp?(mock('abc')).should be_nil + end + + it "returns true for empty strings in different encodings" do + ''.b.should.casecmp?('') + ''.b.should.casecmp?(''.encode("UTF-32LE")) + end +end diff --git a/spec/ruby/core/string/center_spec.rb b/spec/ruby/core/string/center_spec.rb new file mode 100644 index 0000000000..1667b59327 --- /dev/null +++ b/spec/ruby/core/string/center_spec.rb @@ -0,0 +1,117 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#center with length, padding" do + it "returns a new string of specified length with self centered and padded with padstr" do + "one".center(9, '.').should == "...one..." + "hello".center(20, '123').should == "1231231hello12312312" + "middle".center(13, '-').should == "---middle----" + + "".center(1, "abcd").should == "a" + "".center(2, "abcd").should == "aa" + "".center(3, "abcd").should == "aab" + "".center(4, "abcd").should == "abab" + "".center(6, "xy").should == "xyxxyx" + "".center(11, "12345").should == "12345123451" + + "|".center(2, "abcd").should == "|a" + "|".center(3, "abcd").should == "a|a" + "|".center(4, "abcd").should == "a|ab" + "|".center(5, "abcd").should == "ab|ab" + "|".center(6, "xy").should == "xy|xyx" + "|".center(7, "xy").should == "xyx|xyx" + "|".center(11, "12345").should == "12345|12345" + "|".center(12, "12345").should == "12345|123451" + + "||".center(3, "abcd").should == "||a" + "||".center(4, "abcd").should == "a||a" + "||".center(5, "abcd").should == "a||ab" + "||".center(6, "abcd").should == "ab||ab" + "||".center(8, "xy").should == "xyx||xyx" + "||".center(12, "12345").should == "12345||12345" + "||".center(13, "12345").should == "12345||123451" + end + + it "pads with whitespace if no padstr is given" do + "two".center(5).should == " two " + "hello".center(20).should == " hello " + end + + it "returns self if it's longer than or as long as the specified length" do + "".center(0).should == "" + "".center(-1).should == "" + "hello".center(4).should == "hello" + "hello".center(-1).should == "hello" + "this".center(3).should == "this" + "radiology".center(8, '-').should == "radiology" + end + + it "calls #to_int to convert length to an integer" do + "_".center(3.8, "^").should == "^_^" + + obj = mock('3') + obj.should_receive(:to_int).and_return(3) + + "_".center(obj, "o").should == "o_o" + end + + it "raises a TypeError when length can't be converted to an integer" do + -> { "hello".center("x") }.should raise_error(TypeError) + -> { "hello".center("x", "y") }.should raise_error(TypeError) + -> { "hello".center([]) }.should raise_error(TypeError) + -> { "hello".center(mock('x')) }.should raise_error(TypeError) + end + + it "calls #to_str to convert padstr to a String" do + padstr = mock('123') + padstr.should_receive(:to_str).and_return("123") + + "hello".center(20, padstr).should == "1231231hello12312312" + end + + it "raises a TypeError when padstr can't be converted to a string" do + -> { "hello".center(20, 100) }.should raise_error(TypeError) + -> { "hello".center(20, []) }.should raise_error(TypeError) + -> { "hello".center(20, mock('x')) }.should raise_error(TypeError) + end + + it "raises an ArgumentError if padstr is empty" do + -> { "hello".center(10, "") }.should raise_error(ArgumentError) + -> { "hello".center(0, "") }.should raise_error(ArgumentError) + end + + it "returns String instances when called on subclasses" do + StringSpecs::MyString.new("").center(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").center(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + + "".center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + "foo".center(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + end + + describe "with width" do + it "returns a String in the same encoding as the original" do + str = "abc".dup.force_encoding Encoding::IBM437 + result = str.center 6 + result.should == " abc " + result.encoding.should equal(Encoding::IBM437) + end + end + + describe "with width, pattern" do + it "returns a String in the compatible encoding" do + str = "abc".dup.force_encoding Encoding::IBM437 + result = str.center 6, "あ" + result.should == "あabcああ" + result.encoding.should equal(Encoding::UTF_8) + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + pat = "ア".encode Encoding::EUC_JP + -> do + "あれ".center 5, pat + end.should raise_error(Encoding::CompatibilityError) + end + end +end diff --git a/spec/ruby/core/string/chars_spec.rb b/spec/ruby/core/string/chars_spec.rb new file mode 100644 index 0000000000..ee85430574 --- /dev/null +++ b/spec/ruby/core/string/chars_spec.rb @@ -0,0 +1,16 @@ +require_relative "../../spec_helper" +require_relative 'shared/chars' + +describe "String#chars" do + it_behaves_like :string_chars, :chars + + it "returns an array when no block given" do + "hello".chars.should == ['h', 'e', 'l', 'l', 'o'] + end + + it "returns Strings in the same encoding as self" do + "hello".encode("US-ASCII").chars.each do |c| + c.encoding.should == Encoding::US_ASCII + end + end +end diff --git a/spec/ruby/core/string/chilled_string_spec.rb b/spec/ruby/core/string/chilled_string_spec.rb new file mode 100644 index 0000000000..73d055cbdf --- /dev/null +++ b/spec/ruby/core/string/chilled_string_spec.rb @@ -0,0 +1,151 @@ +require_relative '../../spec_helper' + +describe "chilled String" do + guard -> { ruby_version_is "3.4" and !"test".equal?("test") } do + describe "chilled string literals" do + + describe "#frozen?" do + it "returns false" do + "chilled".frozen?.should == false + end + end + + describe "#-@" do + it "returns a different instance" do + input = "chilled" + interned = (-input) + interned.frozen?.should == true + interned.object_id.should_not == input.object_id + end + end + + describe "#+@" do + it "returns a different instance" do + input = "chilled" + duped = (+input) + duped.frozen?.should == false + duped.object_id.should_not == input.object_id + end + end + + describe "#clone" do + it "preserves chilled status" do + input = "chilled".clone + -> { + input << "-mutated" + }.should complain(/literal string will be frozen in the future/) + input.should == "chilled-mutated" + end + end + + describe "mutation" do + it "emits a warning" do + input = "chilled" + -> { + input << "-mutated" + }.should complain(/literal string will be frozen in the future/) + input.should == "chilled-mutated" + end + + it "emits a warning for concatenated strings" do + input = "still" "+chilled" + -> { + input << "-mutated" + }.should complain(/literal string will be frozen in the future/) + input.should == "still+chilled-mutated" + end + + it "emits a warning on singleton_class creation" do + -> { + "chilled".singleton_class + }.should complain(/literal string will be frozen in the future/) + end + + it "emits a warning on instance variable assignment" do + -> { + "chilled".instance_variable_set(:@ivar, 42) + }.should complain(/literal string will be frozen in the future/) + end + + it "raises FrozenError after the string was explicitly frozen" do + input = "chilled" + input.freeze + -> { + -> { + input << "mutated" + }.should raise_error(FrozenError) + }.should_not complain(/literal string will be frozen in the future/) + end + end + end + + describe "chilled strings returned by Symbol#to_s" do + + describe "#frozen?" do + it "returns false" do + :chilled.to_s.frozen?.should == false + end + end + + describe "#-@" do + it "returns a different instance" do + input = :chilled.to_s + interned = (-input) + interned.frozen?.should == true + interned.object_id.should_not == input.object_id + end + end + + describe "#+@" do + it "returns a different instance" do + input = :chilled.to_s + duped = (+input) + duped.frozen?.should == false + duped.object_id.should_not == input.object_id + end + end + + describe "#clone" do + it "preserves chilled status" do + input = :chilled.to_s.clone + -> { + input << "-mutated" + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + input.should == "chilled-mutated" + end + end + + describe "mutation" do + it "emits a warning" do + input = :chilled.to_s + -> { + input << "-mutated" + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + input.should == "chilled-mutated" + end + + it "emits a warning on singleton_class creation" do + -> { + :chilled.to_s.singleton_class + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + end + + it "emits a warning on instance variable assignment" do + -> { + :chilled.to_s.instance_variable_set(:@ivar, 42) + }.should complain(/string returned by :chilled\.to_s will be frozen in the future/) + end + + it "raises FrozenError after the string was explicitly frozen" do + input = :chilled.to_s + input.freeze + -> { + -> { + input << "mutated" + }.should raise_error(FrozenError) + }.should_not complain(/string returned by :chilled\.to_s will be frozen in the future/) + end + end + end + end +end diff --git a/spec/ruby/core/string/chomp_spec.rb b/spec/ruby/core/string/chomp_spec.rb new file mode 100644 index 0000000000..d27c84c6f6 --- /dev/null +++ b/spec/ruby/core/string/chomp_spec.rb @@ -0,0 +1,366 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#chomp" do + describe "when passed no argument" do + before do + # Ensure that $/ is set to the default value + @verbose, $VERBOSE = $VERBOSE, nil + @dollar_slash, $/ = $/, "\n" + end + + after do + $/ = @dollar_slash + $VERBOSE = @verbose + end + + it "does not modify a String with no trailing carriage return or newline" do + "abc".chomp.should == "abc" + end + + it "returns a copy of the String when it is not modified" do + str = "abc" + str.chomp.should_not equal(str) + end + + it "removes one trailing newline" do + "abc\n\n".chomp.should == "abc\n" + end + + it "removes one trailing carriage return" do + "abc\r\r".chomp.should == "abc\r" + end + + it "removes one trailing carriage return, newline pair" do + "abc\r\n\r\n".chomp.should == "abc\r\n" + end + + it "returns an empty String when self is empty" do + "".chomp.should == "" + end + + it "returns a String in the same encoding as self" do + "abc\n\n".encode("US-ASCII").chomp.encoding.should == Encoding::US_ASCII + end + + it "returns String instances when called on a subclass" do + str = StringSpecs::MyString.new("hello\n").chomp + str.should be_an_instance_of(String) + end + + it "removes trailing characters that match $/ when it has been assigned a value" do + $/ = "cdef" + "abcdef".chomp.should == "ab" + end + + it "removes one trailing newline for string with invalid encoding" do + "\xa0\xa1\n".chomp.should == "\xa0\xa1" + end + end + + describe "when passed nil" do + it "does not modify the String" do + "abc\r\n".chomp(nil).should == "abc\r\n" + end + + it "returns a copy of the String" do + str = "abc" + str.chomp(nil).should_not equal(str) + end + + it "returns an empty String when self is empty" do + "".chomp(nil).should == "" + end + end + + describe "when passed ''" do + it "removes a final newline" do + "abc\n".chomp("").should == "abc" + end + + it "removes a final carriage return, newline" do + "abc\r\n".chomp("").should == "abc" + end + + it "does not remove a final carriage return" do + "abc\r".chomp("").should == "abc\r" + end + + it "removes more than one trailing newlines" do + "abc\n\n\n".chomp("").should == "abc" + end + + it "removes more than one trailing carriage return, newline pairs" do + "abc\r\n\r\n\r\n".chomp("").should == "abc" + end + + it "returns an empty String when self is empty" do + "".chomp("").should == "" + end + + it "removes one trailing newline for string with invalid encoding" do + "\xa0\xa1\n".chomp("").should == "\xa0\xa1" + end + end + + describe "when passed '\\n'" do + it "removes one trailing newline" do + "abc\n\n".chomp("\n").should == "abc\n" + end + + it "removes one trailing carriage return" do + "abc\r\r".chomp("\n").should == "abc\r" + end + + it "removes one trailing carriage return, newline pair" do + "abc\r\n\r\n".chomp("\n").should == "abc\r\n" + end + + it "returns an empty String when self is empty" do + "".chomp("\n").should == "" + end + end + + describe "when passed an Object" do + it "calls #to_str to convert to a String" do + arg = mock("string chomp") + arg.should_receive(:to_str).and_return("bc") + "abc".chomp(arg).should == "a" + end + + it "raises a TypeError if #to_str does not return a String" do + arg = mock("string chomp") + arg.should_receive(:to_str).and_return(1) + -> { "abc".chomp(arg) }.should raise_error(TypeError) + end + end + + describe "when passed a String" do + it "removes the trailing characters if they match the argument" do + "abcabc".chomp("abc").should == "abc" + end + + it "does not modify the String if the argument does not match the trailing characters" do + "abc".chomp("def").should == "abc" + end + + it "returns an empty String when self is empty" do + "".chomp("abc").should == "" + end + + it "returns an empty String when the argument equals self" do + "abc".chomp("abc").should == "" + end + end +end + +describe "String#chomp!" do + describe "when passed no argument" do + before do + # Ensure that $/ is set to the default value + @verbose, $VERBOSE = $VERBOSE, nil + @dollar_slash, $/ = $/, "\n" + end + + after do + $/ = @dollar_slash + $VERBOSE = @verbose + end + + it "modifies self" do + str = "abc\n" + str.chomp!.should equal(str) + end + + it "returns nil if self is not modified" do + "abc".chomp!.should be_nil + end + + it "removes one trailing newline" do + "abc\n\n".chomp!.should == "abc\n" + end + + it "removes one trailing carriage return" do + "abc\r\r".chomp!.should == "abc\r" + end + + it "removes one trailing carriage return, newline pair" do + "abc\r\n\r\n".chomp!.should == "abc\r\n" + end + + it "returns nil when self is empty" do + "".chomp!.should be_nil + end + + it "returns subclass instances when called on a subclass" do + str = StringSpecs::MyString.new("hello\n").chomp! + str.should be_an_instance_of(StringSpecs::MyString) + end + + it "removes trailing characters that match $/ when it has been assigned a value" do + $/ = "cdef" + "abcdef".chomp!.should == "ab" + end + end + + describe "when passed nil" do + it "returns nil" do + "abc\r\n".chomp!(nil).should be_nil + end + + it "returns nil when self is empty" do + "".chomp!(nil).should be_nil + end + end + + describe "when passed ''" do + it "removes a final newline" do + "abc\n".chomp!("").should == "abc" + end + + it "removes a final carriage return, newline" do + "abc\r\n".chomp!("").should == "abc" + end + + it "does not remove a final carriage return" do + "abc\r".chomp!("").should be_nil + end + + it "removes more than one trailing newlines" do + "abc\n\n\n".chomp!("").should == "abc" + end + + it "removes more than one trailing carriage return, newline pairs" do + "abc\r\n\r\n\r\n".chomp!("").should == "abc" + end + + it "returns nil when self is empty" do + "".chomp!("").should be_nil + end + end + + describe "when passed '\\n'" do + it "removes one trailing newline" do + "abc\n\n".chomp!("\n").should == "abc\n" + end + + it "removes one trailing carriage return" do + "abc\r\r".chomp!("\n").should == "abc\r" + end + + it "removes one trailing carriage return, newline pair" do + "abc\r\n\r\n".chomp!("\n").should == "abc\r\n" + end + + it "returns nil when self is empty" do + "".chomp!("\n").should be_nil + end + end + + describe "when passed an Object" do + it "calls #to_str to convert to a String" do + arg = mock("string chomp") + arg.should_receive(:to_str).and_return("bc") + "abc".chomp!(arg).should == "a" + end + + it "raises a TypeError if #to_str does not return a String" do + arg = mock("string chomp") + arg.should_receive(:to_str).and_return(1) + -> { "abc".chomp!(arg) }.should raise_error(TypeError) + end + end + + describe "when passed a String" do + it "removes the trailing characters if they match the argument" do + "abcabc".chomp!("abc").should == "abc" + end + + it "returns nil if the argument does not match the trailing characters" do + "abc".chomp!("def").should be_nil + end + + it "returns nil when self is empty" do + "".chomp!("abc").should be_nil + end + end + + it "raises a FrozenError on a frozen instance when it is modified" do + a = "string\n\r" + a.freeze + + -> { a.chomp! }.should raise_error(FrozenError) + end + + # see [ruby-core:23666] + it "raises a FrozenError on a frozen instance when it would not be modified" do + a = "string\n\r" + a.freeze + -> { a.chomp!(nil) }.should raise_error(FrozenError) + -> { a.chomp!("x") }.should raise_error(FrozenError) + end +end + +describe "String#chomp" do + before :each do + @verbose, $VERBOSE = $VERBOSE, nil + @before_separator = $/ + end + + after :each do + $/ = @before_separator + $VERBOSE = @verbose + end + + it "does not modify a multi-byte character" do + "あれ".chomp.should == "あれ" + end + + it "removes the final carriage return, newline from a multibyte String" do + "あれ\r\n".chomp.should == "あれ" + end + + it "removes the final carriage return, newline from a non-ASCII String" do + str = "abc\r\n".encode "utf-32be" + str.chomp.should == "abc".encode("utf-32be") + end + + it "removes the final carriage return, newline from a non-ASCII String when the record separator is changed" do + $/ = "\n".encode("utf-8") + str = "abc\r\n".encode "utf-32be" + str.chomp.should == "abc".encode("utf-32be") + end +end + +describe "String#chomp!" do + before :each do + @verbose, $VERBOSE = $VERBOSE, nil + @before_separator = $/ + end + + after :each do + $/ = @before_separator + $VERBOSE = @verbose + end + + it "returns nil when the String is not modified" do + "あれ".chomp!.should be_nil + end + + it "removes the final carriage return, newline from a multibyte String" do + "あれ\r\n".chomp!.should == "あれ" + end + + it "removes the final carriage return, newline from a non-ASCII String" do + str = "abc\r\n".encode "utf-32be" + str.chomp!.should == "abc".encode("utf-32be") + end + + it "removes the final carriage return, newline from a non-ASCII String when the record separator is changed" do + $/ = "\n".encode("utf-8") + str = "abc\r\n".encode "utf-32be" + str.chomp!.should == "abc".encode("utf-32be") + end +end diff --git a/spec/ruby/core/string/chop_spec.rb b/spec/ruby/core/string/chop_spec.rb new file mode 100644 index 0000000000..99c2c82190 --- /dev/null +++ b/spec/ruby/core/string/chop_spec.rb @@ -0,0 +1,119 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#chop" do + it "removes the final character" do + "abc".chop.should == "ab" + end + + it "removes the final carriage return" do + "abc\r".chop.should == "abc" + end + + it "removes the final newline" do + "abc\n".chop.should == "abc" + end + + it "removes the final carriage return, newline" do + "abc\r\n".chop.should == "abc" + end + + it "removes the carriage return, newline if they are the only characters" do + "\r\n".chop.should == "" + end + + it "does not remove more than the final carriage return, newline" do + "abc\r\n\r\n".chop.should == "abc\r\n" + end + + it "removes a multi-byte character" do + "あれ".chop.should == "あ" + end + + it "removes the final carriage return, newline from a multibyte String" do + "あれ\r\n".chop.should == "あれ" + end + + it "removes the final carriage return, newline from a non-ASCII String" do + str = "abc\r\n".encode "utf-32be" + str.chop.should == "abc".encode("utf-32be") + end + + it "returns an empty string when applied to an empty string" do + "".chop.should == "" + end + + it "returns a new string when applied to an empty string" do + s = "" + s.chop.should_not equal(s) + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("hello\n").chop.should be_an_instance_of(String) + end + + it "returns a String in the same encoding as self" do + "abc\n\n".encode("US-ASCII").chop.encoding.should == Encoding::US_ASCII + end +end + +describe "String#chop!" do + it "removes the final character" do + "abc".chop!.should == "ab" + end + + it "removes the final carriage return" do + "abc\r".chop!.should == "abc" + end + + it "removes the final newline" do + "abc\n".chop!.should == "abc" + end + + it "removes the final carriage return, newline" do + "abc\r\n".chop!.should == "abc" + end + + it "removes the carriage return, newline if they are the only characters" do + "\r\n".chop!.should == "" + end + + it "does not remove more than the final carriage return, newline" do + "abc\r\n\r\n".chop!.should == "abc\r\n" + end + + it "removes a multi-byte character" do + "あれ".chop!.should == "あ" + end + + it "removes the final carriage return, newline from a multibyte String" do + "あれ\r\n".chop!.should == "あれ" + end + + it "removes the final carriage return, newline from a non-ASCII String" do + str = "abc\r\n".encode "utf-32be" + str.chop!.should == "abc".encode("utf-32be") + end + + it "returns self if modifications were made" do + str = "hello" + str.chop!.should equal(str) + end + + it "returns nil when called on an empty string" do + "".chop!.should be_nil + end + + it "raises a FrozenError on a frozen instance that is modified" do + -> { "string\n\r".freeze.chop! }.should raise_error(FrozenError) + end + + # see [ruby-core:23666] + it "raises a FrozenError on a frozen instance that would not be modified" do + a = "" + a.freeze + -> { a.chop! }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/chr_spec.rb b/spec/ruby/core/string/chr_spec.rb new file mode 100644 index 0000000000..9ed29542e6 --- /dev/null +++ b/spec/ruby/core/string/chr_spec.rb @@ -0,0 +1,42 @@ +require_relative '../../spec_helper' + +describe "String#chr" do + it "returns a copy of self" do + s = 'e' + s.should_not equal s.chr + end + + it "returns a String" do + 'glark'.chr.should be_an_instance_of(String) + end + + it "returns an empty String if self is an empty String" do + "".chr.should == "" + end + + it "returns a 1-character String" do + "glark".chr.size.should == 1 + end + + it "returns the character at the start of the String" do + "Goodbye, world".chr.should == "G" + end + + it "returns a String in the same encoding as self" do + "\x24".encode(Encoding::US_ASCII).chr.encoding.should == Encoding::US_ASCII + end + + it "understands multi-byte characters" do + s = "\u{9879}" + s.bytesize.should == 3 + s.chr.should == s + end + + it "understands Strings that contain a mixture of character widths" do + three = "\u{8082}" + three.bytesize.should == 3 + four = "\u{77082}" + four.bytesize.should == 4 + "#{three}#{four}".chr.should == three + end +end diff --git a/spec/ruby/core/string/clear_spec.rb b/spec/ruby/core/string/clear_spec.rb new file mode 100644 index 0000000000..152986fd0f --- /dev/null +++ b/spec/ruby/core/string/clear_spec.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: false +require_relative '../../spec_helper' + +describe "String#clear" do + before :each do + @s = "Jolene" + end + + it "sets self equal to the empty String" do + @s.clear + @s.should == "" + end + + it "returns self after emptying it" do + cleared = @s.clear + cleared.should == "" + cleared.should equal @s + end + + it "preserves its encoding" do + @s.encode!(Encoding::SHIFT_JIS) + @s.encoding.should == Encoding::SHIFT_JIS + @s.clear.encoding.should == Encoding::SHIFT_JIS + @s.encoding.should == Encoding::SHIFT_JIS + end + + it "works with multibyte Strings" do + s = "\u{9765}\u{876}" + s.clear + s.should == "" + end + + it "raises a FrozenError if self is frozen" do + @s.freeze + -> { @s.clear }.should raise_error(FrozenError) + -> { "".freeze.clear }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/clone_spec.rb b/spec/ruby/core/string/clone_spec.rb new file mode 100644 index 0000000000..a2ba2f9877 --- /dev/null +++ b/spec/ruby/core/string/clone_spec.rb @@ -0,0 +1,61 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#clone" do + before :each do + ScratchPad.clear + @obj = StringSpecs::InitializeString.new "string" + end + + it "calls #initialize_copy on the new instance" do + clone = @obj.clone + ScratchPad.recorded.should_not == @obj.object_id + ScratchPad.recorded.should == clone.object_id + end + + it "copies instance variables" do + clone = @obj.clone + clone.ivar.should == 1 + end + + it "copies singleton methods" do + def @obj.special() :the_one end + clone = @obj.clone + clone.special.should == :the_one + end + + it "copies modules included in the singleton class" do + class << @obj + include StringSpecs::StringModule + end + + clone = @obj.clone + clone.repr.should == 1 + end + + it "copies constants defined in the singleton class" do + class << @obj + CLONE = :clone + end + + clone = @obj.clone + (class << clone; CLONE; end).should == :clone + end + + it "copies frozen state" do + @obj.freeze.clone.frozen?.should be_true + "".freeze.clone.frozen?.should be_true + end + + it "does not modify the original string when changing cloned string" do + orig = "string"[0..100] + clone = orig.clone + orig[0] = 'x' + orig.should == "xtring" + clone.should == "string" + end + + it "returns a String in the same encoding as self" do + "a".encode("US-ASCII").clone.encoding.should == Encoding::US_ASCII + end +end diff --git a/spec/ruby/core/string/codepoints_spec.rb b/spec/ruby/core/string/codepoints_spec.rb new file mode 100644 index 0000000000..12a5bf5892 --- /dev/null +++ b/spec/ruby/core/string/codepoints_spec.rb @@ -0,0 +1,18 @@ +# encoding: binary +require_relative '../../spec_helper' +require_relative 'shared/codepoints' +require_relative 'shared/each_codepoint_without_block' + +describe "String#codepoints" do + it_behaves_like :string_codepoints, :codepoints + + it "returns an Array when no block is given" do + "abc".codepoints.should == [?a.ord, ?b.ord, ?c.ord] + end + + it "raises an ArgumentError when no block is given if self has an invalid encoding" do + s = "\xDF".dup.force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + -> { s.codepoints }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/comparison_spec.rb b/spec/ruby/core/string/comparison_spec.rb new file mode 100644 index 0000000000..9db0cff5ee --- /dev/null +++ b/spec/ruby/core/string/comparison_spec.rb @@ -0,0 +1,112 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#<=> with String" do + it "compares individual characters based on their ascii value" do + ascii_order = Array.new(256) { |x| x.chr } + sort_order = ascii_order.sort + sort_order.should == ascii_order + end + + it "returns -1 when self is less than other" do + ("this" <=> "those").should == -1 + end + + it "returns 0 when self is equal to other" do + ("yep" <=> "yep").should == 0 + end + + it "returns 1 when self is greater than other" do + ("yoddle" <=> "griddle").should == 1 + end + + it "considers string that comes lexicographically first to be less if strings have same size" do + ("aba" <=> "abc").should == -1 + ("abc" <=> "aba").should == 1 + end + + it "doesn't consider shorter string to be less if longer string starts with shorter one" do + ("abc" <=> "abcd").should == -1 + ("abcd" <=> "abc").should == 1 + end + + it "compares shorter string with corresponding number of first chars of longer string" do + ("abx" <=> "abcd").should == 1 + ("abcd" <=> "abx").should == -1 + end + + it "ignores subclass differences" do + a = "hello" + b = StringSpecs::MyString.new("hello") + + (a <=> b).should == 0 + (b <=> a).should == 0 + end + + it "returns 0 if self and other are bytewise identical and have the same encoding" do + ("ÄÖÜ" <=> "ÄÖÜ").should == 0 + end + + it "returns 0 if self and other are bytewise identical and have the same encoding" do + ("ÄÖÜ" <=> "ÄÖÜ").should == 0 + end + + it "returns -1 if self is bytewise less than other" do + ("ÄÖÛ" <=> "ÄÖÜ").should == -1 + end + + it "returns 1 if self is bytewise greater than other" do + ("ÄÖÜ" <=> "ÄÖÛ").should == 1 + end + + it "ignores encoding difference" do + ("ÄÖÛ".dup.force_encoding("utf-8") <=> "ÄÖÜ".dup.force_encoding("iso-8859-1")).should == -1 + ("ÄÖÜ".dup.force_encoding("utf-8") <=> "ÄÖÛ".dup.force_encoding("iso-8859-1")).should == 1 + end + + it "returns 0 with identical ASCII-compatible bytes of different encodings" do + ("abc".dup.force_encoding("utf-8") <=> "abc".dup.force_encoding("iso-8859-1")).should == 0 + end + + it "compares the indices of the encodings when the strings have identical non-ASCII-compatible bytes" do + xff_1 = [0xFF].pack('C').force_encoding("utf-8") + xff_2 = [0xFF].pack('C').force_encoding("iso-8859-1") + (xff_1 <=> xff_2).should == -1 + (xff_2 <=> xff_1).should == 1 + end + + it "returns 0 when comparing 2 empty strings but one is not ASCII-compatible" do + ("" <=> "".dup.force_encoding('iso-2022-jp')).should == 0 + end +end + +# Note: This is inconsistent with Array#<=> which calls #to_ary instead of +# just using it as an indicator. +describe "String#<=>" do + it "returns nil if its argument provides neither #to_str nor #<=>" do + ("abc" <=> mock('x')).should be_nil + end + + it "uses the result of calling #to_str for comparison when #to_str is defined" do + obj = mock('x') + obj.should_receive(:to_str).and_return("aaa") + + ("abc" <=> obj).should == 1 + end + + it "uses the result of calling #<=> on its argument when #<=> is defined but #to_str is not" do + obj = mock('x') + obj.should_receive(:<=>).and_return(-1) + + ("abc" <=> obj).should == 1 + end + + it "returns nil if argument also uses an inverse comparison for <=>" do + obj = mock('x') + def obj.<=>(other); other <=> self; end + obj.should_receive(:<=>).once + + ("abc" <=> obj).should be_nil + end +end diff --git a/spec/ruby/core/string/concat_spec.rb b/spec/ruby/core/string/concat_spec.rb new file mode 100644 index 0000000000..cbd7df54e2 --- /dev/null +++ b/spec/ruby/core/string/concat_spec.rb @@ -0,0 +1,27 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/concat' + +describe "String#concat" do + it_behaves_like :string_concat, :concat + it_behaves_like :string_concat_encoding, :concat + it_behaves_like :string_concat_type_coercion, :concat + + it "takes multiple arguments" do + str = +"hello " + str.concat "wo", "", "rld" + str.should == "hello world" + end + + it "concatenates the initial value when given arguments contain 2 self" do + str = +"hello" + str.concat str, str + str.should == "hellohellohello" + end + + it "returns self when given no arguments" do + str = +"hello" + str.concat.should equal(str) + str.should == "hello" + end +end diff --git a/spec/ruby/core/string/count_spec.rb b/spec/ruby/core/string/count_spec.rb new file mode 100644 index 0000000000..e614e901dd --- /dev/null +++ b/spec/ruby/core/string/count_spec.rb @@ -0,0 +1,105 @@ +# encoding: binary +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#count" do + it "counts occurrences of chars from the intersection of the specified sets" do + s = "hello\nworld\x00\x00" + + s.count(s).should == s.size + s.count("lo").should == 5 + s.count("eo").should == 3 + s.count("l").should == 3 + s.count("\n").should == 1 + s.count("\x00").should == 2 + + s.count("").should == 0 + "".count("").should == 0 + + s.count("l", "lo").should == s.count("l") + s.count("l", "lo", "o").should == s.count("") + s.count("helo", "hel", "h").should == s.count("h") + s.count("helo", "", "x").should == 0 + end + + it "raises an ArgumentError when given no arguments" do + -> { "hell yeah".count }.should raise_error(ArgumentError) + end + + it "negates sets starting with ^" do + s = "^hello\nworld\x00\x00" + + s.count("^").should == 1 # no negation, counts ^ + + s.count("^leh").should == 9 + s.count("^o").should == 12 + + s.count("helo", "^el").should == s.count("ho") + s.count("aeiou", "^e").should == s.count("aiou") + + "^_^".count("^^").should == 1 + "oa^_^o".count("a^").should == 3 + end + + it "counts all chars in a sequence" do + s = "hel-[()]-lo012^" + + s.count("\x00-\xFF").should == s.size + s.count("ej-m").should == 3 + s.count("e-h").should == 2 + + # no sequences + s.count("-").should == 2 + s.count("e-").should == s.count("e") + s.count("-") + s.count("-h").should == s.count("h") + s.count("-") + + s.count("---").should == s.count("-") + + # see an ASCII table for reference + s.count("--2").should == s.count("-./012") + s.count("(--").should == s.count("()*+,-") + s.count("A-a").should == s.count("A-Z[\\]^_`a") + + # negated sequences + s.count("^e-h").should == s.size - s.count("e-h") + s.count("^^-^").should == s.size - s.count("^") + s.count("^---").should == s.size - s.count("-") + + "abcdefgh".count("a-ce-fh").should == 6 + "abcdefgh".count("he-fa-c").should == 6 + "abcdefgh".count("e-fha-c").should == 6 + + "abcde".count("ac-e").should == 4 + "abcde".count("^ac-e").should == 1 + end + + it "raises if the given sequences are invalid" do + s = "hel-[()]-lo012^" + + -> { s.count("h-e") }.should raise_error(ArgumentError) + -> { s.count("^h-e") }.should raise_error(ArgumentError) + end + + it 'returns the number of occurrences of a multi-byte character' do + str = "\u{2605}" + str.count(str).should == 1 + "asd#{str}zzz#{str}ggg".count(str).should == 2 + end + + it "calls #to_str to convert each set arg to a String" do + other_string = mock('lo') + other_string.should_receive(:to_str).and_return("lo") + + other_string2 = mock('o') + other_string2.should_receive(:to_str).and_return("o") + + s = "hello world" + s.count(other_string, other_string2).should == s.count("o") + end + + it "raises a TypeError when a set arg can't be converted to a string" do + -> { "hello world".count(100) }.should raise_error(TypeError) + -> { "hello world".count([]) }.should raise_error(TypeError) + -> { "hello world".count(mock('x')) }.should raise_error(TypeError) + end +end diff --git a/spec/ruby/core/string/crypt_spec.rb b/spec/ruby/core/string/crypt_spec.rb new file mode 100644 index 0000000000..06f84c70a4 --- /dev/null +++ b/spec/ruby/core/string/crypt_spec.rb @@ -0,0 +1,92 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#crypt" do + platform_is :openbsd do + it "returns a cryptographic hash of self by applying the bcrypt algorithm with the specified salt" do + "mypassword".crypt("$2a$04$0WVaz0pV3jzfZ5G5tpmHWu").should == "$2a$04$0WVaz0pV3jzfZ5G5tpmHWuBQGbkjzgtSc3gJbmdy0GAGMa45MFM2." + + # Only uses first 72 characters of string + ("12345678"*9).crypt("$2a$04$0WVaz0pV3jzfZ5G5tpmHWu").should == "$2a$04$0WVaz0pV3jzfZ5G5tpmHWukj/ORBnsMjCGpST/zCJnAypc7eAbutK" + ("12345678"*10).crypt("$2a$04$0WVaz0pV3jzfZ5G5tpmHWu").should == "$2a$04$0WVaz0pV3jzfZ5G5tpmHWukj/ORBnsMjCGpST/zCJnAypc7eAbutK" + + # Only uses first 29 characters of salt + "mypassword".crypt("$2a$04$0WVaz0pV3jzfZ5G5tpmHWuB").should == "$2a$04$0WVaz0pV3jzfZ5G5tpmHWuBQGbkjzgtSc3gJbmdy0GAGMa45MFM2." + end + + it "raises Errno::EINVAL when the salt is shorter than 29 characters" do + -> { "mypassword".crypt("$2a$04$0WVaz0pV3jzfZ5G5tpmHW") }.should raise_error(Errno::EINVAL) + end + + it "calls #to_str to converts the salt arg to a String" do + obj = mock('$2a$04$0WVaz0pV3jzfZ5G5tpmHWu') + obj.should_receive(:to_str).and_return("$2a$04$0WVaz0pV3jzfZ5G5tpmHWu") + + "mypassword".crypt(obj).should == "$2a$04$0WVaz0pV3jzfZ5G5tpmHWuBQGbkjzgtSc3gJbmdy0GAGMa45MFM2." + end + + it "doesn't return subclass instances" do + StringSpecs::MyString.new("mypassword").crypt("$2a$04$0WVaz0pV3jzfZ5G5tpmHWu").should be_an_instance_of(String) + "mypassword".crypt(StringSpecs::MyString.new("$2a$04$0WVaz0pV3jzfZ5G5tpmHWu")).should be_an_instance_of(String) + StringSpecs::MyString.new("mypassword").crypt(StringSpecs::MyString.new("$2a$04$0WVaz0pV3jzfZ5G5tpmHWu")).should be_an_instance_of(String) + end + end + + platform_is_not :openbsd do + # Note: MRI's documentation just says that the C stdlib function crypt() is + # called. + # + # I'm not sure if crypt() is guaranteed to produce the same result across + # different platforms. It seems that there is one standard UNIX implementation + # of crypt(), but that alternative implementations are possible. See + # http://www.unix.org.ua/orelly/networking/puis/ch08_06.htm + it "returns a cryptographic hash of self by applying the UNIX crypt algorithm with the specified salt" do + "".crypt("aa").should == "aaQSqAReePlq6" + "nutmeg".crypt("Mi").should == "MiqkFWCm1fNJI" + "ellen1".crypt("ri").should == "ri79kNd7V6.Sk" + "Sharon".crypt("./").should == "./UY9Q7TvYJDg" + "norahs".crypt("am").should == "amfIADT2iqjA." + "norahs".crypt("7a").should == "7azfT5tIdyh0I" + + # Only uses first 8 chars of string + "01234567".crypt("aa").should == "aa4c4gpuvCkSE" + "012345678".crypt("aa").should == "aa4c4gpuvCkSE" + "0123456789".crypt("aa").should == "aa4c4gpuvCkSE" + + # Only uses first 2 chars of salt + "hello world".crypt("aa").should == "aayPz4hyPS1wI" + "hello world".crypt("aab").should == "aayPz4hyPS1wI" + "hello world".crypt("aabc").should == "aayPz4hyPS1wI" + end + + it "raises an ArgumentError when the string contains NUL character" do + -> { "poison\0null".crypt("aa") }.should raise_error(ArgumentError) + end + + it "calls #to_str to converts the salt arg to a String" do + obj = mock('aa') + obj.should_receive(:to_str).and_return("aa") + + "".crypt(obj).should == "aaQSqAReePlq6" + end + + it "doesn't return subclass instances" do + StringSpecs::MyString.new("hello").crypt("aa").should be_an_instance_of(String) + "hello".crypt(StringSpecs::MyString.new("aa")).should be_an_instance_of(String) + StringSpecs::MyString.new("hello").crypt(StringSpecs::MyString.new("aa")).should be_an_instance_of(String) + end + + it "raises an ArgumentError when the salt is shorter than two characters" do + -> { "hello".crypt("") }.should raise_error(ArgumentError) + -> { "hello".crypt("f") }.should raise_error(ArgumentError) + -> { "hello".crypt("\x00\x00") }.should raise_error(ArgumentError) + -> { "hello".crypt("\x00a") }.should raise_error(ArgumentError) + -> { "hello".crypt("a\x00") }.should raise_error(ArgumentError) + end + end + + it "raises a type error when the salt arg can't be converted to a string" do + -> { "".crypt(5) }.should raise_error(TypeError) + -> { "".crypt(mock('x')) }.should raise_error(TypeError) + end +end diff --git a/spec/ruby/core/string/dedup_spec.rb b/spec/ruby/core/string/dedup_spec.rb new file mode 100644 index 0000000000..2b31d80708 --- /dev/null +++ b/spec/ruby/core/string/dedup_spec.rb @@ -0,0 +1,6 @@ +require_relative '../../spec_helper' +require_relative 'shared/dedup' + +describe 'String#dedup' do + it_behaves_like :string_dedup, :dedup +end diff --git a/spec/ruby/core/string/delete_prefix_spec.rb b/spec/ruby/core/string/delete_prefix_spec.rb new file mode 100644 index 0000000000..ee7f044905 --- /dev/null +++ b/spec/ruby/core/string/delete_prefix_spec.rb @@ -0,0 +1,83 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#delete_prefix" do + it "returns a copy of the string, with the given prefix removed" do + 'hello'.delete_prefix('hell').should == 'o' + 'hello'.delete_prefix('hello').should == '' + end + + it "returns a copy of the string, when the prefix isn't found" do + s = 'hello' + r = s.delete_prefix('hello!') + r.should_not equal s + r.should == s + r = s.delete_prefix('ell') + r.should_not equal s + r.should == s + r = s.delete_prefix('') + r.should_not equal s + r.should == s + end + + it "does not remove partial bytes, only full characters" do + "\xe3\x81\x82".delete_prefix("\xe3").should == "\xe3\x81\x82" + end + + it "doesn't set $~" do + $~ = nil + + 'hello'.delete_prefix('hell') + $~.should == nil + end + + it "calls to_str on its argument" do + o = mock('x') + o.should_receive(:to_str).and_return 'hell' + 'hello'.delete_prefix(o).should == 'o' + end + + it "returns a String instance when called on a subclass instance" do + s = StringSpecs::MyString.new('hello') + s.delete_prefix('hell').should be_an_instance_of(String) + end + + it "returns a String in the same encoding as self" do + 'hello'.encode("US-ASCII").delete_prefix('hell').encoding.should == Encoding::US_ASCII + end +end + +describe "String#delete_prefix!" do + it "removes the found prefix" do + s = 'hello' + s.delete_prefix!('hell').should equal(s) + s.should == 'o' + end + + it "returns nil if no change is made" do + s = 'hello' + s.delete_prefix!('ell').should == nil + s.delete_prefix!('').should == nil + end + + it "doesn't set $~" do + $~ = nil + + 'hello'.delete_prefix!('hell') + $~.should == nil + end + + it "calls to_str on its argument" do + o = mock('x') + o.should_receive(:to_str).and_return 'hell' + 'hello'.delete_prefix!(o).should == 'o' + end + + it "raises a FrozenError when self is frozen" do + -> { 'hello'.freeze.delete_prefix!('hell') }.should raise_error(FrozenError) + -> { 'hello'.freeze.delete_prefix!('') }.should raise_error(FrozenError) + -> { ''.freeze.delete_prefix!('') }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/delete_spec.rb b/spec/ruby/core/string/delete_spec.rb new file mode 100644 index 0000000000..6d359776e4 --- /dev/null +++ b/spec/ruby/core/string/delete_spec.rb @@ -0,0 +1,117 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#delete" do + it "returns a new string with the chars from the intersection of sets removed" do + s = "hello" + s.delete("lo").should == "he" + s.should == "hello" + + "hello".delete("l", "lo").should == "heo" + + "hell yeah".delete("").should == "hell yeah" + end + + it "raises an ArgumentError when given no arguments" do + -> { "hell yeah".delete }.should raise_error(ArgumentError) + end + + it "negates sets starting with ^" do + "hello".delete("aeiou", "^e").should == "hell" + "hello".delete("^leh").should == "hell" + "hello".delete("^o").should == "o" + "hello".delete("^").should == "hello" + "^_^".delete("^^").should == "^^" + "oa^_^o".delete("a^").should == "o_o" + end + + it "deletes all chars in a sequence" do + "hello".delete("ej-m").should == "ho" + "hello".delete("e-h").should == "llo" + "hel-lo".delete("e-").should == "hllo" + "hel-lo".delete("-h").should == "ello" + "hel-lo".delete("---").should == "hello" + "hel-012".delete("--2").should == "hel" + "hel-()".delete("(--").should == "hel" + "hello".delete("^e-h").should == "he" + "hello^".delete("^^-^").should == "^" + "hel--lo".delete("^---").should == "--" + + "abcdefgh".delete("a-ce-fh").should == "dg" + "abcdefgh".delete("he-fa-c").should == "dg" + "abcdefgh".delete("e-fha-c").should == "dg" + + "abcde".delete("ac-e").should == "b" + "abcde".delete("^ac-e").should == "acde" + + "ABCabc[]".delete("A-a").should == "bc" + end + + it "deletes multibyte characters" do + "四月".delete("月").should == "四" + '哥哥我倒'.delete('哥').should == "我倒" + end + + it "respects backslash for escaping a -" do + 'Non-Authoritative Information'.delete(' \-\'').should == + 'NonAuthoritativeInformation' + end + + it "raises if the given ranges are invalid" do + not_supported_on :opal do + xFF = [0xFF].pack('C') + range = "\x00 - #{xFF}".force_encoding('utf-8') + -> { "hello".delete(range).should == "" }.should raise_error(ArgumentError) + end + -> { "hello".delete("h-e") }.should raise_error(ArgumentError) + -> { "hello".delete("^h-e") }.should raise_error(ArgumentError) + end + + it "tries to convert each set arg to a string using to_str" do + other_string = mock('lo') + other_string.should_receive(:to_str).and_return("lo") + + other_string2 = mock('o') + other_string2.should_receive(:to_str).and_return("o") + + "hello world".delete(other_string, other_string2).should == "hell wrld" + end + + it "raises a TypeError when one set arg can't be converted to a string" do + -> { "hello world".delete(100) }.should raise_error(TypeError) + -> { "hello world".delete([]) }.should raise_error(TypeError) + -> { "hello world".delete(mock('x')) }.should raise_error(TypeError) + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("oh no!!!").delete("!").should be_an_instance_of(String) + end + + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").delete("lo").encoding.should == Encoding::US_ASCII + end +end + +describe "String#delete!" do + it "modifies self in place and returns self" do + a = "hello" + a.delete!("aeiou", "^e").should equal(a) + a.should == "hell" + end + + it "returns nil if no modifications were made" do + a = "hello" + a.delete!("z").should == nil + a.should == "hello" + end + + it "raises a FrozenError when self is frozen" do + a = "hello" + a.freeze + + -> { a.delete!("") }.should raise_error(FrozenError) + -> { a.delete!("aeiou", "^e") }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/delete_suffix_spec.rb b/spec/ruby/core/string/delete_suffix_spec.rb new file mode 100644 index 0000000000..1842d75aa5 --- /dev/null +++ b/spec/ruby/core/string/delete_suffix_spec.rb @@ -0,0 +1,83 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#delete_suffix" do + it "returns a copy of the string, with the given suffix removed" do + 'hello'.delete_suffix('ello').should == 'h' + 'hello'.delete_suffix('hello').should == '' + end + + it "returns a copy of the string, when the suffix isn't found" do + s = 'hello' + r = s.delete_suffix('!hello') + r.should_not equal s + r.should == s + r = s.delete_suffix('ell') + r.should_not equal s + r.should == s + r = s.delete_suffix('') + r.should_not equal s + r.should == s + end + + it "does not remove partial bytes, only full characters" do + "\xe3\x81\x82".delete_suffix("\x82").should == "\xe3\x81\x82" + end + + it "doesn't set $~" do + $~ = nil + + 'hello'.delete_suffix('ello') + $~.should == nil + end + + it "calls to_str on its argument" do + o = mock('x') + o.should_receive(:to_str).and_return 'ello' + 'hello'.delete_suffix(o).should == 'h' + end + + it "returns a String instance when called on a subclass instance" do + s = StringSpecs::MyString.new('hello') + s.delete_suffix('ello').should be_an_instance_of(String) + end + + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").delete_suffix("ello").encoding.should == Encoding::US_ASCII + end +end + +describe "String#delete_suffix!" do + it "removes the found prefix" do + s = 'hello' + s.delete_suffix!('ello').should equal(s) + s.should == 'h' + end + + it "returns nil if no change is made" do + s = 'hello' + s.delete_suffix!('ell').should == nil + s.delete_suffix!('').should == nil + end + + it "doesn't set $~" do + $~ = nil + + 'hello'.delete_suffix!('ello') + $~.should == nil + end + + it "calls to_str on its argument" do + o = mock('x') + o.should_receive(:to_str).and_return 'ello' + 'hello'.delete_suffix!(o).should == 'h' + end + + it "raises a FrozenError when self is frozen" do + -> { 'hello'.freeze.delete_suffix!('ello') }.should raise_error(FrozenError) + -> { 'hello'.freeze.delete_suffix!('') }.should raise_error(FrozenError) + -> { ''.freeze.delete_suffix!('') }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/downcase_spec.rb b/spec/ruby/core/string/downcase_spec.rb new file mode 100644 index 0000000000..2d260f23f1 --- /dev/null +++ b/spec/ruby/core/string/downcase_spec.rb @@ -0,0 +1,195 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#downcase" do + it "returns a copy of self with all uppercase letters downcased" do + "hELLO".downcase.should == "hello" + "hello".downcase.should == "hello" + end + + it "returns a String in the same encoding as self" do + "hELLO".encode("US-ASCII").downcase.encoding.should == Encoding::US_ASCII + end + + describe "full Unicode case mapping" do + it "works for all of Unicode with no option" do + "ÄÖÜ".downcase.should == "äöü" + end + + it "updates string metadata" do + downcased = "\u{212A}ING".downcase + + downcased.should == "king" + downcased.size.should == 4 + downcased.bytesize.should == 4 + downcased.ascii_only?.should be_true + end + end + + describe "ASCII-only case mapping" do + it "does not downcase non-ASCII characters" do + "CÅR".downcase(:ascii).should == "cÅr" + end + + it "works with substrings" do + "prefix TÉ"[-2..-1].downcase(:ascii).should == "tÉ" + end + end + + describe "full Unicode case mapping adapted for Turkic languages" do + it "downcases characters according to Turkic semantics" do + "İ".downcase(:turkic).should == "i" + end + + it "allows Lithuanian as an extra option" do + "İ".downcase(:turkic, :lithuanian).should == "i" + end + + it "does not allow any other additional option" do + -> { "İ".downcase(:turkic, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "full Unicode case mapping adapted for Lithuanian" do + it "currently works the same as full Unicode case mapping" do + "İS".downcase(:lithuanian).should == "i\u{307}s" + end + + it "allows Turkic as an extra option (and applies Turkic semantics)" do + "İS".downcase(:lithuanian, :turkic).should == "is" + end + + it "does not allow any other additional option" do + -> { "İS".downcase(:lithuanian, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "case folding" do + it "case folds special characters" do + "ß".downcase.should == "ß" + "ß".downcase(:fold).should == "ss" + end + end + + it "does not allow invalid options" do + -> { "ABC".downcase(:invalid_option) }.should raise_error(ArgumentError) + end + + it "returns a String instance for subclasses" do + StringSpecs::MyString.new("FOObar").downcase.should be_an_instance_of(String) + end +end + +describe "String#downcase!" do + it "modifies self in place" do + a = "HeLlO" + a.downcase!.should equal(a) + a.should == "hello" + end + + it "modifies self in place for non-ascii-compatible encodings" do + a = "HeLlO".encode("utf-16le") + a.downcase! + a.should == "hello".encode("utf-16le") + end + + describe "full Unicode case mapping" do + it "modifies self in place for all of Unicode with no option" do + a = "ÄÖÜ" + a.downcase! + a.should == "äöü" + end + + it "updates string metadata" do + downcased = "\u{212A}ING" + downcased.downcase! + + downcased.should == "king" + downcased.size.should == 4 + downcased.bytesize.should == 4 + downcased.ascii_only?.should be_true + end + end + + describe "ASCII-only case mapping" do + it "does not downcase non-ASCII characters" do + a = "CÅR" + a.downcase!(:ascii) + a.should == "cÅr" + end + + it "works for non-ascii-compatible encodings" do + a = "ABC".encode("utf-16le") + a.downcase!(:ascii) + a.should == "abc".encode("utf-16le") + end + end + + describe "full Unicode case mapping adapted for Turkic languages" do + it "downcases characters according to Turkic semantics" do + a = "İ" + a.downcase!(:turkic) + a.should == "i" + end + + it "allows Lithuanian as an extra option" do + a = "İ" + a.downcase!(:turkic, :lithuanian) + a.should == "i" + end + + it "does not allow any other additional option" do + -> { a = "İ"; a.downcase!(:turkic, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "full Unicode case mapping adapted for Lithuanian" do + it "currently works the same as full Unicode case mapping" do + a = "İS" + a.downcase!(:lithuanian) + a.should == "i\u{307}s" + end + + it "allows Turkic as an extra option (and applies Turkic semantics)" do + a = "İS" + a.downcase!(:lithuanian, :turkic) + a.should == "is" + end + + it "does not allow any other additional option" do + -> { a = "İS"; a.downcase!(:lithuanian, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "case folding" do + it "case folds special characters" do + a = "ß" + a.downcase! + a.should == "ß" + + a.downcase!(:fold) + a.should == "ss" + end + end + + it "does not allow invalid options" do + -> { a = "ABC"; a.downcase!(:invalid_option) }.should raise_error(ArgumentError) + end + + it "returns nil if no modifications were made" do + a = "hello" + a.downcase!.should == nil + a.should == "hello" + end + + it "raises a FrozenError when self is frozen" do + -> { "HeLlo".freeze.downcase! }.should raise_error(FrozenError) + -> { "hello".freeze.downcase! }.should raise_error(FrozenError) + end + + it "sets the result String encoding to the source String encoding" do + "ABC".downcase.encoding.should equal(Encoding::UTF_8) + end +end diff --git a/spec/ruby/core/string/dump_spec.rb b/spec/ruby/core/string/dump_spec.rb new file mode 100644 index 0000000000..cab8beff5a --- /dev/null +++ b/spec/ruby/core/string/dump_spec.rb @@ -0,0 +1,396 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#dump" do + it "does not take into account if a string is frozen" do + "foo".freeze.dump.should_not.frozen? + end + + it "returns a String instance" do + StringSpecs::MyString.new.dump.should be_an_instance_of(String) + end + + it "wraps string with \"" do + "foo".dump.should == '"foo"' + end + + it "returns a string with special characters replaced with \\<char> notation" do + [ ["\a", '"\\a"'], + ["\b", '"\\b"'], + ["\t", '"\\t"'], + ["\n", '"\\n"'], + ["\v", '"\\v"'], + ["\f", '"\\f"'], + ["\r", '"\\r"'], + ["\e", '"\\e"'] + ].should be_computed_by(:dump) + end + + it "returns a string with \" and \\ escaped with a backslash" do + [ ["\"", '"\\""'], + ["\\", '"\\\\"'] + ].should be_computed_by(:dump) + end + + it "returns a string with \\#<char> when # is followed by $, @, @@, {" do + [ ["\#$PATH", '"\\#$PATH"'], + ["\#@a", '"\\#@a"'], + ["\#@@a", '"\\#@@a"'], + ["\#{a}", '"\\#{a}"'] + ].should be_computed_by(:dump) + end + + it "returns a string with # not escaped when followed by any other character" do + [ ["#", '"#"'], + ["#1", '"#1"'] + ].should be_computed_by(:dump) + end + + it "returns a string with printable non-alphanumeric characters unescaped" do + [ [" ", '" "'], + ["!", '"!"'], + ["$", '"$"'], + ["%", '"%"'], + ["&", '"&"'], + ["'", '"\'"'], + ["(", '"("'], + [")", '")"'], + ["*", '"*"'], + ["+", '"+"'], + [",", '","'], + ["-", '"-"'], + [".", '"."'], + ["/", '"/"'], + [":", '":"'], + [";", '";"'], + ["<", '"<"'], + ["=", '"="'], + [">", '">"'], + ["?", '"?"'], + ["@", '"@"'], + ["[", '"["'], + ["]", '"]"'], + ["^", '"^"'], + ["_", '"_"'], + ["`", '"`"'], + ["{", '"{"'], + ["|", '"|"'], + ["}", '"}"'], + ["~", '"~"'] + ].should be_computed_by(:dump) + end + + it "returns a string with numeric characters unescaped" do + [ ["0", '"0"'], + ["1", '"1"'], + ["2", '"2"'], + ["3", '"3"'], + ["4", '"4"'], + ["5", '"5"'], + ["6", '"6"'], + ["7", '"7"'], + ["8", '"8"'], + ["9", '"9"'], + ].should be_computed_by(:dump) + end + + it "returns a string with upper-case alpha characters unescaped" do + [ ["A", '"A"'], + ["B", '"B"'], + ["C", '"C"'], + ["D", '"D"'], + ["E", '"E"'], + ["F", '"F"'], + ["G", '"G"'], + ["H", '"H"'], + ["I", '"I"'], + ["J", '"J"'], + ["K", '"K"'], + ["L", '"L"'], + ["M", '"M"'], + ["N", '"N"'], + ["O", '"O"'], + ["P", '"P"'], + ["Q", '"Q"'], + ["R", '"R"'], + ["S", '"S"'], + ["T", '"T"'], + ["U", '"U"'], + ["V", '"V"'], + ["W", '"W"'], + ["X", '"X"'], + ["Y", '"Y"'], + ["Z", '"Z"'] + ].should be_computed_by(:dump) + end + + it "returns a string with lower-case alpha characters unescaped" do + [ ["a", '"a"'], + ["b", '"b"'], + ["c", '"c"'], + ["d", '"d"'], + ["e", '"e"'], + ["f", '"f"'], + ["g", '"g"'], + ["h", '"h"'], + ["i", '"i"'], + ["j", '"j"'], + ["k", '"k"'], + ["l", '"l"'], + ["m", '"m"'], + ["n", '"n"'], + ["o", '"o"'], + ["p", '"p"'], + ["q", '"q"'], + ["r", '"r"'], + ["s", '"s"'], + ["t", '"t"'], + ["u", '"u"'], + ["v", '"v"'], + ["w", '"w"'], + ["x", '"x"'], + ["y", '"y"'], + ["z", '"z"'] + ].should be_computed_by(:dump) + end + + it "returns a string with non-printing ASCII characters replaced by \\x notation" do + # Avoid the file encoding by computing the string with #chr. + [ [0000.chr, '"\\x00"'], + [0001.chr, '"\\x01"'], + [0002.chr, '"\\x02"'], + [0003.chr, '"\\x03"'], + [0004.chr, '"\\x04"'], + [0005.chr, '"\\x05"'], + [0006.chr, '"\\x06"'], + [0016.chr, '"\\x0E"'], + [0017.chr, '"\\x0F"'], + [0020.chr, '"\\x10"'], + [0021.chr, '"\\x11"'], + [0022.chr, '"\\x12"'], + [0023.chr, '"\\x13"'], + [0024.chr, '"\\x14"'], + [0025.chr, '"\\x15"'], + [0026.chr, '"\\x16"'], + [0027.chr, '"\\x17"'], + [0030.chr, '"\\x18"'], + [0031.chr, '"\\x19"'], + [0032.chr, '"\\x1A"'], + [0034.chr, '"\\x1C"'], + [0035.chr, '"\\x1D"'], + [0036.chr, '"\\x1E"'], + [0037.chr, '"\\x1F"'], + [0177.chr, '"\\x7F"'], + [0200.chr, '"\\x80"'], + [0201.chr, '"\\x81"'], + [0202.chr, '"\\x82"'], + [0203.chr, '"\\x83"'], + [0204.chr, '"\\x84"'], + [0205.chr, '"\\x85"'], + [0206.chr, '"\\x86"'], + [0207.chr, '"\\x87"'], + [0210.chr, '"\\x88"'], + [0211.chr, '"\\x89"'], + [0212.chr, '"\\x8A"'], + [0213.chr, '"\\x8B"'], + [0214.chr, '"\\x8C"'], + [0215.chr, '"\\x8D"'], + [0216.chr, '"\\x8E"'], + [0217.chr, '"\\x8F"'], + [0220.chr, '"\\x90"'], + [0221.chr, '"\\x91"'], + [0222.chr, '"\\x92"'], + [0223.chr, '"\\x93"'], + [0224.chr, '"\\x94"'], + [0225.chr, '"\\x95"'], + [0226.chr, '"\\x96"'], + [0227.chr, '"\\x97"'], + [0230.chr, '"\\x98"'], + [0231.chr, '"\\x99"'], + [0232.chr, '"\\x9A"'], + [0233.chr, '"\\x9B"'], + [0234.chr, '"\\x9C"'], + [0235.chr, '"\\x9D"'], + [0236.chr, '"\\x9E"'], + [0237.chr, '"\\x9F"'], + [0240.chr, '"\\xA0"'], + [0241.chr, '"\\xA1"'], + [0242.chr, '"\\xA2"'], + [0243.chr, '"\\xA3"'], + [0244.chr, '"\\xA4"'], + [0245.chr, '"\\xA5"'], + [0246.chr, '"\\xA6"'], + [0247.chr, '"\\xA7"'], + [0250.chr, '"\\xA8"'], + [0251.chr, '"\\xA9"'], + [0252.chr, '"\\xAA"'], + [0253.chr, '"\\xAB"'], + [0254.chr, '"\\xAC"'], + [0255.chr, '"\\xAD"'], + [0256.chr, '"\\xAE"'], + [0257.chr, '"\\xAF"'], + [0260.chr, '"\\xB0"'], + [0261.chr, '"\\xB1"'], + [0262.chr, '"\\xB2"'], + [0263.chr, '"\\xB3"'], + [0264.chr, '"\\xB4"'], + [0265.chr, '"\\xB5"'], + [0266.chr, '"\\xB6"'], + [0267.chr, '"\\xB7"'], + [0270.chr, '"\\xB8"'], + [0271.chr, '"\\xB9"'], + [0272.chr, '"\\xBA"'], + [0273.chr, '"\\xBB"'], + [0274.chr, '"\\xBC"'], + [0275.chr, '"\\xBD"'], + [0276.chr, '"\\xBE"'], + [0277.chr, '"\\xBF"'], + [0300.chr, '"\\xC0"'], + [0301.chr, '"\\xC1"'], + [0302.chr, '"\\xC2"'], + [0303.chr, '"\\xC3"'], + [0304.chr, '"\\xC4"'], + [0305.chr, '"\\xC5"'], + [0306.chr, '"\\xC6"'], + [0307.chr, '"\\xC7"'], + [0310.chr, '"\\xC8"'], + [0311.chr, '"\\xC9"'], + [0312.chr, '"\\xCA"'], + [0313.chr, '"\\xCB"'], + [0314.chr, '"\\xCC"'], + [0315.chr, '"\\xCD"'], + [0316.chr, '"\\xCE"'], + [0317.chr, '"\\xCF"'], + [0320.chr, '"\\xD0"'], + [0321.chr, '"\\xD1"'], + [0322.chr, '"\\xD2"'], + [0323.chr, '"\\xD3"'], + [0324.chr, '"\\xD4"'], + [0325.chr, '"\\xD5"'], + [0326.chr, '"\\xD6"'], + [0327.chr, '"\\xD7"'], + [0330.chr, '"\\xD8"'], + [0331.chr, '"\\xD9"'], + [0332.chr, '"\\xDA"'], + [0333.chr, '"\\xDB"'], + [0334.chr, '"\\xDC"'], + [0335.chr, '"\\xDD"'], + [0336.chr, '"\\xDE"'], + [0337.chr, '"\\xDF"'], + [0340.chr, '"\\xE0"'], + [0341.chr, '"\\xE1"'], + [0342.chr, '"\\xE2"'], + [0343.chr, '"\\xE3"'], + [0344.chr, '"\\xE4"'], + [0345.chr, '"\\xE5"'], + [0346.chr, '"\\xE6"'], + [0347.chr, '"\\xE7"'], + [0350.chr, '"\\xE8"'], + [0351.chr, '"\\xE9"'], + [0352.chr, '"\\xEA"'], + [0353.chr, '"\\xEB"'], + [0354.chr, '"\\xEC"'], + [0355.chr, '"\\xED"'], + [0356.chr, '"\\xEE"'], + [0357.chr, '"\\xEF"'], + [0360.chr, '"\\xF0"'], + [0361.chr, '"\\xF1"'], + [0362.chr, '"\\xF2"'], + [0363.chr, '"\\xF3"'], + [0364.chr, '"\\xF4"'], + [0365.chr, '"\\xF5"'], + [0366.chr, '"\\xF6"'], + [0367.chr, '"\\xF7"'], + [0370.chr, '"\\xF8"'], + [0371.chr, '"\\xF9"'], + [0372.chr, '"\\xFA"'], + [0373.chr, '"\\xFB"'], + [0374.chr, '"\\xFC"'], + [0375.chr, '"\\xFD"'], + [0376.chr, '"\\xFE"'], + [0377.chr, '"\\xFF"'] + ].should be_computed_by(:dump) + end + + it "returns a string with non-printing single-byte UTF-8 characters replaced by \\x notation" do + [ [0000.chr('utf-8'), '"\x00"'], + [0001.chr('utf-8'), '"\x01"'], + [0002.chr('utf-8'), '"\x02"'], + [0003.chr('utf-8'), '"\x03"'], + [0004.chr('utf-8'), '"\x04"'], + [0005.chr('utf-8'), '"\x05"'], + [0006.chr('utf-8'), '"\x06"'], + [0016.chr('utf-8'), '"\x0E"'], + [0017.chr('utf-8'), '"\x0F"'], + [0020.chr('utf-8'), '"\x10"'], + [0021.chr('utf-8'), '"\x11"'], + [0022.chr('utf-8'), '"\x12"'], + [0023.chr('utf-8'), '"\x13"'], + [0024.chr('utf-8'), '"\x14"'], + [0025.chr('utf-8'), '"\x15"'], + [0026.chr('utf-8'), '"\x16"'], + [0027.chr('utf-8'), '"\x17"'], + [0030.chr('utf-8'), '"\x18"'], + [0031.chr('utf-8'), '"\x19"'], + [0032.chr('utf-8'), '"\x1A"'], + [0034.chr('utf-8'), '"\x1C"'], + [0035.chr('utf-8'), '"\x1D"'], + [0036.chr('utf-8'), '"\x1E"'], + [0037.chr('utf-8'), '"\x1F"'], + [0177.chr('utf-8'), '"\x7F"'] + ].should be_computed_by(:dump) + end + + it "returns a string with multi-byte UTF-8 characters less than or equal 0xFFFF replaced by \\uXXXX notation with upper-case hex digits" do + [ [0200.chr('utf-8'), '"\u0080"'], + [0201.chr('utf-8'), '"\u0081"'], + [0202.chr('utf-8'), '"\u0082"'], + [0203.chr('utf-8'), '"\u0083"'], + [0204.chr('utf-8'), '"\u0084"'], + [0206.chr('utf-8'), '"\u0086"'], + [0207.chr('utf-8'), '"\u0087"'], + [0210.chr('utf-8'), '"\u0088"'], + [0211.chr('utf-8'), '"\u0089"'], + [0212.chr('utf-8'), '"\u008A"'], + [0213.chr('utf-8'), '"\u008B"'], + [0214.chr('utf-8'), '"\u008C"'], + [0215.chr('utf-8'), '"\u008D"'], + [0216.chr('utf-8'), '"\u008E"'], + [0217.chr('utf-8'), '"\u008F"'], + [0220.chr('utf-8'), '"\u0090"'], + [0221.chr('utf-8'), '"\u0091"'], + [0222.chr('utf-8'), '"\u0092"'], + [0223.chr('utf-8'), '"\u0093"'], + [0224.chr('utf-8'), '"\u0094"'], + [0225.chr('utf-8'), '"\u0095"'], + [0226.chr('utf-8'), '"\u0096"'], + [0227.chr('utf-8'), '"\u0097"'], + [0230.chr('utf-8'), '"\u0098"'], + [0231.chr('utf-8'), '"\u0099"'], + [0232.chr('utf-8'), '"\u009A"'], + [0233.chr('utf-8'), '"\u009B"'], + [0234.chr('utf-8'), '"\u009C"'], + [0235.chr('utf-8'), '"\u009D"'], + [0236.chr('utf-8'), '"\u009E"'], + [0237.chr('utf-8'), '"\u009F"'], + [0177777.chr('utf-8'), '"\uFFFF"'], + ].should be_computed_by(:dump) + end + + it "returns a string with multi-byte UTF-8 characters greater than 0xFFFF replaced by \\u{XXXXXX} notation with upper-case hex digits" do + 0x10000.chr('utf-8').dump.should == '"\u{10000}"' + 0x10FFFF.chr('utf-8').dump.should == '"\u{10FFFF}"' + end + + it "includes .force_encoding(name) if the encoding isn't ASCII compatible" do + "\u{876}".encode('utf-16be').dump.should.end_with?(".force_encoding(\"UTF-16BE\")") + "\u{876}".encode('utf-16le').dump.should.end_with?(".force_encoding(\"UTF-16LE\")") + end + + it "returns a String in the same encoding as self" do + "foo".encode("ISO-8859-1").dump.encoding.should == Encoding::ISO_8859_1 + "foo".encode('windows-1251').dump.encoding.should == Encoding::Windows_1251 + 1.chr.dump.encoding.should == Encoding::US_ASCII + end +end diff --git a/spec/ruby/core/string/dup_spec.rb b/spec/ruby/core/string/dup_spec.rb new file mode 100644 index 0000000000..073802d84b --- /dev/null +++ b/spec/ruby/core/string/dup_spec.rb @@ -0,0 +1,65 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#dup" do + before :each do + ScratchPad.clear + @obj = StringSpecs::InitializeString.new "string" + end + + it "calls #initialize_copy on the new instance" do + dup = @obj.dup + ScratchPad.recorded.should_not == @obj.object_id + ScratchPad.recorded.should == dup.object_id + end + + it "copies instance variables" do + dup = @obj.dup + dup.ivar.should == 1 + end + + it "does not copy singleton methods" do + def @obj.special() :the_one end + dup = @obj.dup + -> { dup.special }.should raise_error(NameError) + end + + it "does not copy modules included in the singleton class" do + class << @obj + include StringSpecs::StringModule + end + + dup = @obj.dup + -> { dup.repr }.should raise_error(NameError) + end + + it "does not copy constants defined in the singleton class" do + class << @obj + CLONE = :clone + end + + dup = @obj.dup + -> { class << dup; CLONE; end }.should raise_error(NameError) + end + + it "does not modify the original string when changing dupped string" do + orig = "string"[0..100] + dup = orig.dup + orig[0] = 'x' + orig.should == "xtring" + dup.should == "string" + end + + it "does not modify the original setbyte-mutated string when changing dupped string" do + orig = +"a" + orig.setbyte 0, "b".ord + copy = orig.dup + orig.setbyte 0, "c".ord + orig.should == "c" + copy.should == "b" + end + + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").dup.encoding.should == Encoding::US_ASCII + end +end diff --git a/spec/ruby/core/string/each_byte_spec.rb b/spec/ruby/core/string/each_byte_spec.rb new file mode 100644 index 0000000000..7b3db265ac --- /dev/null +++ b/spec/ruby/core/string/each_byte_spec.rb @@ -0,0 +1,61 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#each_byte" do + it "passes each byte in self to the given block" do + a = [] + "hello\x00".each_byte { |c| a << c } + a.should == [104, 101, 108, 108, 111, 0] + end + + it "keeps iterating from the old position (to new string end) when self changes" do + r = +"" + s = +"hello world" + s.each_byte do |c| + r << c + s.insert(0, "<>") if r.size < 3 + end + r.should == "h><>hello world" + + r = +"" + s = +"hello world" + s.each_byte { |c| s.slice!(-1); r << c } + r.should == "hello " + + r = +"" + s = +"hello world" + s.each_byte { |c| s.slice!(0); r << c } + r.should == "hlowrd" + + r = +"" + s = +"hello world" + s.each_byte { |c| s.slice!(0..-1); r << c } + r.should == "h" + end + + it "returns self" do + s = "hello" + (s.each_byte {}).should equal(s) + end + + describe "when no block is given" do + it "returns an enumerator" do + enum = "hello".each_byte + enum.should be_an_instance_of(Enumerator) + enum.to_a.should == [104, 101, 108, 108, 111] + end + + describe "returned enumerator" do + describe "size" do + it "should return the bytesize of the string" do + str = "hello" + str.each_byte.size.should == str.bytesize + str = "ola" + str.each_byte.size.should == str.bytesize + str = "\303\207\342\210\202\303\251\306\222g" + str.each_byte.size.should == str.bytesize + end + end + end + end +end diff --git a/spec/ruby/core/string/each_char_spec.rb b/spec/ruby/core/string/each_char_spec.rb new file mode 100644 index 0000000000..36219f79db --- /dev/null +++ b/spec/ruby/core/string/each_char_spec.rb @@ -0,0 +1,8 @@ +require_relative "../../spec_helper" +require_relative 'shared/chars' +require_relative 'shared/each_char_without_block' + +describe "String#each_char" do + it_behaves_like :string_chars, :each_char + it_behaves_like :string_each_char_without_block, :each_char +end diff --git a/spec/ruby/core/string/each_codepoint_spec.rb b/spec/ruby/core/string/each_codepoint_spec.rb new file mode 100644 index 0000000000..c11cb1beae --- /dev/null +++ b/spec/ruby/core/string/each_codepoint_spec.rb @@ -0,0 +1,8 @@ +require_relative '../../spec_helper' +require_relative 'shared/codepoints' +require_relative 'shared/each_codepoint_without_block' + +describe "String#each_codepoint" do + it_behaves_like :string_codepoints, :each_codepoint + it_behaves_like :string_each_codepoint_without_block, :each_codepoint +end diff --git a/spec/ruby/core/string/each_grapheme_cluster_spec.rb b/spec/ruby/core/string/each_grapheme_cluster_spec.rb new file mode 100644 index 0000000000..e1fa4ae67b --- /dev/null +++ b/spec/ruby/core/string/each_grapheme_cluster_spec.rb @@ -0,0 +1,16 @@ +require_relative "../../spec_helper" +require_relative 'shared/chars' +require_relative 'shared/grapheme_clusters' +require_relative 'shared/each_char_without_block' + +describe "String#each_grapheme_cluster" do + it_behaves_like :string_chars, :each_grapheme_cluster + it_behaves_like :string_grapheme_clusters, :each_grapheme_cluster + it_behaves_like :string_each_char_without_block, :each_grapheme_cluster + + it "yields String instances for subclasses" do + a = [] + StringSpecs::MyString.new("abc").each_grapheme_cluster { |s| a << s.class } + a.should == [String, String, String] + end +end diff --git a/spec/ruby/core/string/each_line_spec.rb b/spec/ruby/core/string/each_line_spec.rb new file mode 100644 index 0000000000..90fc920bf1 --- /dev/null +++ b/spec/ruby/core/string/each_line_spec.rb @@ -0,0 +1,9 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/each_line' +require_relative 'shared/each_line_without_block' + +describe "String#each_line" do + it_behaves_like :string_each_line, :each_line + it_behaves_like :string_each_line_without_block, :each_line +end diff --git a/spec/ruby/core/string/element_reference_spec.rb b/spec/ruby/core/string/element_reference_spec.rb new file mode 100644 index 0000000000..f6e1750c93 --- /dev/null +++ b/spec/ruby/core/string/element_reference_spec.rb @@ -0,0 +1,35 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/slice' + +describe "String#[]" do + it_behaves_like :string_slice, :[] +end + +describe "String#[] with index, length" do + it_behaves_like :string_slice_index_length, :[] +end + +describe "String#[] with Range" do + it_behaves_like :string_slice_range, :[] +end + +describe "String#[] with Regexp" do + it_behaves_like :string_slice_regexp, :[] +end + +describe "String#[] with Regexp, index" do + it_behaves_like :string_slice_regexp_index, :[] +end + +describe "String#[] with Regexp, group" do + it_behaves_like :string_slice_regexp_group, :[] +end + +describe "String#[] with String" do + it_behaves_like :string_slice_string, :[] +end + +describe "String#[] with Symbol" do + it_behaves_like :string_slice_symbol, :[] +end diff --git a/spec/ruby/core/string/element_set_spec.rb b/spec/ruby/core/string/element_set_spec.rb new file mode 100644 index 0000000000..e7599f832c --- /dev/null +++ b/spec/ruby/core/string/element_set_spec.rb @@ -0,0 +1,589 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +# TODO: Add missing String#[]= specs: +# String#[re, idx] = obj + +describe "String#[]= with Integer index" do + it "replaces the char at idx with other_str" do + a = "hello" + a[0] = "bam" + a.should == "bamello" + a[-2] = "" + a.should == "bamelo" + end + + it "raises an IndexError without changing self if idx is outside of self" do + str = "hello" + + -> { str[20] = "bam" }.should raise_error(IndexError) + str.should == "hello" + + -> { str[-20] = "bam" }.should raise_error(IndexError) + str.should == "hello" + + -> { ""[-1] = "bam" }.should raise_error(IndexError) + end + + # Behaviour is verified by matz in + # http://redmine.ruby-lang.org/issues/show/1750 + it "allows assignment to the zero'th element of an empty String" do + str = "" + str[0] = "bam" + str.should == "bam" + end + + it "raises IndexError if the string index doesn't match a position in the string" do + str = "hello" + -> { str['y'] = "bam" }.should raise_error(IndexError) + str.should == "hello" + end + + it "raises a FrozenError when self is frozen" do + a = "hello" + a.freeze + + -> { a[0] = "bam" }.should raise_error(FrozenError) + end + + it "calls to_int on index" do + str = "hello" + str[0.5] = "hi " + str.should == "hi ello" + + obj = mock('-1') + obj.should_receive(:to_int).and_return(-1) + str[obj] = "!" + str.should == "hi ell!" + end + + it "calls #to_str to convert other to a String" do + other_str = mock('-test-') + other_str.should_receive(:to_str).and_return("-test-") + + a = "abc" + a[1] = other_str + a.should == "a-test-c" + end + + it "raises a TypeError if other_str can't be converted to a String" do + -> { "test"[1] = [] }.should raise_error(TypeError) + -> { "test"[1] = mock('x') }.should raise_error(TypeError) + -> { "test"[1] = nil }.should raise_error(TypeError) + end + + it "raises a TypeError if passed an Integer replacement" do + -> { "abc"[1] = 65 }.should raise_error(TypeError) + end + + it "raises an IndexError if the index is greater than character size" do + -> { "あれ"[4] = "a" }.should raise_error(IndexError) + end + + it "calls #to_int to convert the index" do + index = mock("string element set") + index.should_receive(:to_int).and_return(1) + + str = "あれ" + str[index] = "a" + str.should == "あa" + end + + it "raises a TypeError if #to_int does not return an Integer" do + index = mock("string element set") + index.should_receive(:to_int).and_return('1') + + -> { "abc"[index] = "d" }.should raise_error(TypeError) + end + + it "raises an IndexError if #to_int returns a value out of range" do + index = mock("string element set") + index.should_receive(:to_int).and_return(4) + + -> { "ab"[index] = "c" }.should raise_error(IndexError) + end + + it "replaces a character with a multibyte character" do + str = "ありがとu" + str[4] = "う" + str.should == "ありがとう" + end + + it "replaces a multibyte character with a character" do + str = "ありがとう" + str[4] = "u" + str.should == "ありがとu" + end + + it "replaces a multibyte character with a multibyte character" do + str = "ありがとお" + str[4] = "う" + str.should == "ありがとう" + end + + it "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[0] = rep + str.encoding.should equal(Encoding::BINARY) + end + + it "updates the string to a compatible encoding" do + str = " " + str[1] = [0xB9].pack("C*") + str.encoding.should == Encoding::ASCII_8BIT + end + + it "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str[0] = rep }.should raise_error(Encoding::CompatibilityError) + end +end + +describe "String#[]= with String index" do + it "replaces fewer characters with more characters" do + str = "abcde" + str["cd"] = "ghi" + str.should == "abghie" + end + + it "replaces more characters with fewer characters" do + str = "abcde" + str["bcd"] = "f" + str.should == "afe" + end + + it "replaces characters with no characters" do + str = "abcde" + str["cd"] = "" + str.should == "abe" + end + + it "raises an IndexError if the search String is not found" do + str = "abcde" + -> { str["g"] = "h" }.should raise_error(IndexError) + end + + it "replaces characters with a multibyte character" do + str = "ありgaとう" + str["ga"] = "が" + str.should == "ありがとう" + end + + it "replaces multibyte characters with characters" do + str = "ありがとう" + str["が"] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters with multibyte characters" do + str = "ありがとう" + str["が"] = "か" + str.should == "ありかとう" + end + + it "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[" "] = rep + str.encoding.should equal(Encoding::BINARY) + end + + it "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str["れ"] = rep }.should raise_error(Encoding::CompatibilityError) + end +end + +describe "String#[]= with a Regexp index" do + it "replaces the matched text with the rhs" do + str = "hello" + str[/lo/] = "x" + str.should == "helx" + end + + it "raises IndexError if the regexp index doesn't match a position in the string" do + str = "hello" + -> { str[/y/] = "bam" }.should raise_error(IndexError) + str.should == "hello" + end + + it "calls #to_str to convert the replacement" do + rep = mock("string element set regexp") + rep.should_receive(:to_str).and_return("b") + + str = "abc" + str[/ab/] = rep + str.should == "bc" + end + + it "checks the match before calling #to_str to convert the replacement" do + rep = mock("string element set regexp") + rep.should_not_receive(:to_str) + + -> { "abc"[/def/] = rep }.should raise_error(IndexError) + end + + describe "with 3 arguments" do + it "calls #to_int to convert the second object" do + ref = mock("string element set regexp ref") + ref.should_receive(:to_int).and_return(1) + + str = "abc" + str[/a(b)/, ref] = "x" + str.should == "axc" + end + + it "raises a TypeError if #to_int does not return an Integer" do + ref = mock("string element set regexp ref") + ref.should_receive(:to_int).and_return(nil) + + -> { "abc"[/a(b)/, ref] = "x" }.should raise_error(TypeError) + end + + it "uses the 2nd of 3 arguments as which capture should be replaced" do + str = "aaa bbb ccc" + str[/a (bbb) c/, 1] = "ddd" + str.should == "aaa ddd ccc" + end + + it "allows the specified capture to be negative and count from the end" do + str = "abcd" + str[/(a)(b)(c)(d)/, -2] = "e" + str.should == "abed" + end + + it "checks the match index before calling #to_str to convert the replacement" do + rep = mock("string element set regexp") + rep.should_not_receive(:to_str) + + -> { "abc"[/a(b)/, 2] = rep }.should raise_error(IndexError) + end + + it "raises IndexError if the specified capture isn't available" do + str = "aaa bbb ccc" + -> { str[/a (bbb) c/, 2] = "ddd" }.should raise_error(IndexError) + -> { str[/a (bbb) c/, -2] = "ddd" }.should raise_error(IndexError) + end + + describe "when the optional capture does not match" do + it "raises an IndexError before setting the replacement" do + str1 = "a b c" + str2 = str1.dup + -> { str2[/a (b) (Z)?/, 2] = "d" }.should raise_error(IndexError) + str2.should == str1 + end + end + end + + it "replaces characters with a multibyte character" do + str = "ありgaとう" + str[/ga/] = "が" + str.should == "ありがとう" + end + + it "replaces multibyte characters with characters" do + str = "ありがとう" + str[/が/] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters with multibyte characters" do + str = "ありがとう" + str[/が/] = "か" + str.should == "ありかとう" + end + + it "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[/ /] = rep + str.encoding.should equal(Encoding::BINARY) + end + + it "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str[/れ/] = rep }.should raise_error(Encoding::CompatibilityError) + end +end + +describe "String#[]= with a Range index" do + describe "with an empty replacement" do + it "does not replace a character with a zero-index, zero exclude-end range" do + str = "abc" + str[0...0] = "" + str.should == "abc" + end + + it "does not replace a character with a zero exclude-end range" do + str = "abc" + str[1...1] = "" + str.should == "abc" + end + + it "replaces a character with zero-index, zero non-exclude-end range" do + str = "abc" + str[0..0] = "" + str.should == "bc" + end + + it "replaces a character with a zero non-exclude-end range" do + str = "abc" + str[1..1] = "" + str.should == "ac" + end + end + + it "replaces the contents with a shorter String" do + str = "abcde" + str[0..-1] = "hg" + str.should == "hg" + end + + it "replaces the contents with a longer String" do + str = "abc" + str[0...4] = "uvwxyz" + str.should == "uvwxyz" + end + + it "replaces a partial string" do + str = "abcde" + str[1..3] = "B" + str.should == "aBe" + end + + it "raises a RangeError if negative Range begin is out of range" do + -> { "abc"[-4..-2] = "x" }.should raise_error(RangeError, "-4..-2 out of range") + end + + it "raises a RangeError if positive Range begin is greater than String size" do + -> { "abc"[4..2] = "x" }.should raise_error(RangeError, "4..2 out of range") + end + + it "uses the Range end as an index rather than a count" do + str = "abcdefg" + str[-5..3] = "xyz" + str.should == "abxyzefg" + end + + it "treats a negative out-of-range Range end with a positive Range begin as a zero count" do + str = "abc" + str[1..-4] = "x" + str.should == "axbc" + end + + it "treats a negative out-of-range Range end with a negative Range begin as a zero count" do + str = "abcd" + str[-1..-4] = "x" + str.should == "abcxd" + end + + it "replaces characters with a multibyte character" do + str = "ありgaとう" + str[2..3] = "が" + str.should == "ありがとう" + end + + it "replaces multibyte characters with characters" do + str = "ありがとう" + str[2...3] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters by negative indexes" do + str = "ありがとう" + str[-3...-2] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters with multibyte characters" do + str = "ありがとう" + str[2..2] = "か" + str.should == "ありかとう" + end + + it "deletes a multibyte character" do + str = "ありとう" + str[2..3] = "" + str.should == "あり" + end + + it "inserts a multibyte character" do + str = "ありとう" + str[2...2] = "が" + str.should == "ありがとう" + end + + it "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[0..1] = rep + str.encoding.should equal(Encoding::BINARY) + end + + it "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str[0..1] = rep }.should raise_error(Encoding::CompatibilityError) + end +end + +describe "String#[]= with Integer index, count" do + it "starts at idx and overwrites count characters before inserting the rest of other_str" do + a = "hello" + a[0, 2] = "xx" + a.should == "xxllo" + a = "hello" + a[0, 2] = "jello" + a.should == "jellollo" + end + + it "counts negative idx values from end of the string" do + a = "hello" + a[-1, 0] = "bob" + a.should == "hellbobo" + a = "hello" + a[-5, 0] = "bob" + a.should == "bobhello" + end + + it "overwrites and deletes characters if count is more than the length of other_str" do + a = "hello" + a[0, 4] = "x" + a.should == "xo" + a = "hello" + a[0, 5] = "x" + a.should == "x" + end + + it "deletes characters if other_str is an empty string" do + a = "hello" + a[0, 2] = "" + a.should == "llo" + end + + it "deletes characters up to the maximum length of the existing string" do + a = "hello" + a[0, 6] = "x" + a.should == "x" + a = "hello" + a[0, 100] = "" + a.should == "" + end + + it "appends other_str to the end of the string if idx == the length of the string" do + a = "hello" + a[5, 0] = "bob" + a.should == "hellobob" + end + + it "calls #to_int to convert the index and count objects" do + index = mock("string element set index") + index.should_receive(:to_int).and_return(-4) + + count = mock("string element set count") + count.should_receive(:to_int).and_return(2) + + str = "abcde" + str[index, count] = "xyz" + str.should == "axyzde" + end + + it "raises a TypeError if #to_int for index does not return an Integer" do + index = mock("string element set index") + index.should_receive(:to_int).and_return("1") + + -> { "abc"[index, 2] = "xyz" }.should raise_error(TypeError) + end + + it "raises a TypeError if #to_int for count does not return an Integer" do + count = mock("string element set count") + count.should_receive(:to_int).and_return("1") + + -> { "abc"[1, count] = "xyz" }.should raise_error(TypeError) + end + + it "calls #to_str to convert the replacement object" do + r = mock("string element set replacement") + r.should_receive(:to_str).and_return("xyz") + + str = "abcde" + str[2, 2] = r + str.should == "abxyze" + end + + it "raises a TypeError of #to_str does not return a String" do + r = mock("string element set replacement") + r.should_receive(:to_str).and_return(nil) + + -> { "abc"[1, 1] = r }.should raise_error(TypeError) + end + + it "raises an IndexError if |idx| is greater than the length of the string" do + -> { "hello"[6, 0] = "bob" }.should raise_error(IndexError) + -> { "hello"[-6, 0] = "bob" }.should raise_error(IndexError) + end + + it "raises an IndexError if count < 0" do + -> { "hello"[0, -1] = "bob" }.should raise_error(IndexError) + -> { "hello"[1, -1] = "bob" }.should raise_error(IndexError) + end + + it "raises a TypeError if other_str is a type other than String" do + -> { "hello"[0, 2] = nil }.should raise_error(TypeError) + -> { "hello"[0, 2] = [] }.should raise_error(TypeError) + -> { "hello"[0, 2] = 33 }.should raise_error(TypeError) + end + + it "replaces characters with a multibyte character" do + str = "ありgaとう" + str[2, 2] = "が" + str.should == "ありがとう" + end + + it "replaces multibyte characters with characters" do + str = "ありがとう" + str[2, 1] = "ga" + str.should == "ありgaとう" + end + + it "replaces multibyte characters with multibyte characters" do + str = "ありがとう" + str[2, 1] = "か" + str.should == "ありかとう" + end + + it "deletes a multibyte character" do + str = "ありとう" + str[2, 2] = "" + str.should == "あり" + end + + it "inserts a multibyte character" do + str = "ありとう" + str[2, 0] = "が" + str.should == "ありがとう" + end + + it "raises an IndexError if the character index is out of range of a multibyte String" do + -> { "あれ"[3, 0] = "り" }.should raise_error(IndexError) + end + + it "encodes the String in an encoding compatible with the replacement" do + str = " ".force_encoding Encoding::US_ASCII + rep = [160].pack('C').force_encoding Encoding::BINARY + str[0, 1] = rep + str.encoding.should equal(Encoding::BINARY) + end + + it "raises an Encoding::CompatibilityError if the replacement encoding is incompatible" do + str = "あれ" + rep = "が".encode Encoding::EUC_JP + -> { str[0, 1] = rep }.should raise_error(Encoding::CompatibilityError) + end +end diff --git a/spec/ruby/core/string/empty_spec.rb b/spec/ruby/core/string/empty_spec.rb new file mode 100644 index 0000000000..8e53a16afc --- /dev/null +++ b/spec/ruby/core/string/empty_spec.rb @@ -0,0 +1,12 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#empty?" do + it "returns true if the string has a length of zero" do + "hello".should_not.empty? + " ".should_not.empty? + "\x00".should_not.empty? + "".should.empty? + StringSpecs::MyString.new("").should.empty? + end +end diff --git a/spec/ruby/core/string/encode_spec.rb b/spec/ruby/core/string/encode_spec.rb new file mode 100644 index 0000000000..cd449498a3 --- /dev/null +++ b/spec/ruby/core/string/encode_spec.rb @@ -0,0 +1,240 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'shared/encode' + +describe "String#encode" do + before :each do + @external = Encoding.default_external + @internal = Encoding.default_internal + end + + after :each do + Encoding.default_external = @external + Encoding.default_internal = @internal + end + + it_behaves_like :string_encode, :encode + + describe "when passed no options" do + it "returns a copy when Encoding.default_internal is nil" do + Encoding.default_internal = nil + str = "あ" + encoded = str.encode + encoded.should_not equal(str) + encoded.should == str + end + + it "returns a copy for a ASCII-only String when Encoding.default_internal is nil" do + Encoding.default_internal = nil + str = "abc" + encoded = str.encode + encoded.should_not equal(str) + encoded.should == str + end + + it "encodes an ascii substring of a binary string to UTF-8" do + x82 = [0x82].pack('C') + str = "#{x82}foo".dup.force_encoding("binary")[1..-1].encode("utf-8") + str.should == "foo".dup.force_encoding("utf-8") + str.encoding.should equal(Encoding::UTF_8) + end + end + + describe "when passed to encoding" do + it "returns a copy when passed the same encoding as the String" do + str = "あ" + encoded = str.encode(Encoding::UTF_8) + encoded.should_not equal(str) + encoded.should == str + end + + it "round trips a String" do + str = "abc def".dup.force_encoding Encoding::US_ASCII + str.encode("utf-32be").encode("ascii").should == "abc def" + end + end + + describe "when passed options" do + it "returns a copy when Encoding.default_internal is nil" do + Encoding.default_internal = nil + str = "あ" + str.encode(invalid: :replace).should_not equal(str) + end + + it "normalizes newlines with cr_newline option" do + "\r\nfoo".encode(cr_newline: true).should == "\r\rfoo" + "\rfoo".encode(cr_newline: true).should == "\rfoo" + "\nfoo".encode(cr_newline: true).should == "\rfoo" + end + + it "normalizes newlines with crlf_newline option" do + "\r\nfoo".encode(crlf_newline: true).should == "\r\r\nfoo" + "\rfoo".encode(crlf_newline: true).should == "\rfoo" + "\nfoo".encode(crlf_newline: true).should == "\r\nfoo" + end + + it "normalizes newlines with universal_newline option" do + "\r\nfoo".encode(universal_newline: true).should == "\nfoo" + "\rfoo".encode(universal_newline: true).should == "\nfoo" + "\nfoo".encode(universal_newline: true).should == "\nfoo" + end + + it "replaces invalid encoding in source with default replacement" do + encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace) + encoded.should == "\u3061\ufffd\ufffd".encode("UTF-16LE") + encoded.encode("UTF-8").should == "ち\ufffd\ufffd" + end + + it "replaces invalid encoding in source with a specified replacement" do + encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo") + encoded.should == "\u3061foofoo".encode("UTF-16LE") + encoded.encode("UTF-8").should == "ちfoofoo" + end + + it "replace multiple invalid bytes at the end with a single replacement character" do + "\xE3\x81\x93\xE3\x81".encode("UTF-8", invalid: :replace).should == "\u3053\ufffd" + end + + it "replaces invalid encoding in source using a specified replacement even when a fallback is given" do + encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: -> c { "bar" }) + encoded.should == "\u3061foofoo".encode("UTF-16LE") + encoded.encode("UTF-8").should == "ちfoofoo" + end + + it "replaces undefined encoding in destination with default replacement" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace) + encoded.should == "B?".encode(Encoding::US_ASCII) + encoded.encode("UTF-8").should == "B?" + end + + it "replaces undefined encoding in destination with a specified replacement" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo") + encoded.should == "Bfoo".encode(Encoding::US_ASCII) + encoded.encode("UTF-8").should == "Bfoo" + end + + it "replaces undefined encoding in destination with a specified replacement even if a fallback is given" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, undef: :replace, replace: "foo", fallback: proc {|x| "bar"}) + encoded.should == "Bfoo".encode(Encoding::US_ASCII) + encoded.encode("UTF-8").should == "Bfoo" + end + + it "replaces undefined encoding in destination using a fallback proc" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: proc {|x| "bar"}) + encoded.should == "Bbar".encode(Encoding::US_ASCII) + encoded.encode("UTF-8").should == "Bbar" + end + + it "replaces invalid encoding in source using replace even when fallback is given as proc" do + encoded = "ち\xE3\x81\xFF".encode("UTF-16LE", invalid: :replace, replace: "foo", fallback: proc {|x| "bar"}) + encoded.should == "\u3061foofoo".encode("UTF-16LE") + encoded.encode("UTF-8").should == "ちfoofoo" + end + end + + describe "when passed to, from" do + it "returns a copy in the destination encoding when both encodings are the same" do + str = "あ".dup.force_encoding("binary") + encoded = str.encode("utf-8", "utf-8") + + encoded.should_not equal(str) + encoded.should == str.force_encoding("utf-8") + encoded.encoding.should == Encoding::UTF_8 + end + + it "returns the transcoded string" do + str = "\x00\x00\x00\x1F" + str.encode(Encoding::UTF_8, Encoding::UTF_16BE).should == "\u0000\u001f" + end + end + + describe "when passed to, options" do + it "returns a copy when the destination encoding is the same as the String encoding" do + str = "あ" + encoded = str.encode(Encoding::UTF_8, undef: :replace) + encoded.should_not equal(str) + encoded.should == str + end + end + + describe "when passed to, from, options" do + it "returns a copy when both encodings are the same" do + str = "あ" + encoded = str.encode("utf-8", "utf-8", invalid: :replace) + encoded.should_not equal(str) + encoded.should == str + end + + it "returns a copy in the destination encoding when both encodings are the same" do + str = "あ".dup.force_encoding("binary") + encoded = str.encode("utf-8", "utf-8", invalid: :replace) + + encoded.should_not equal(str) + encoded.should == str.force_encoding("utf-8") + encoded.encoding.should == Encoding::UTF_8 + end + end +end + +describe "String#encode!" do + before :each do + @external = Encoding.default_external + @internal = Encoding.default_internal + end + + after :each do + Encoding.default_external = @external + Encoding.default_internal = @internal + end + + it_behaves_like :string_encode, :encode! + + it "raises a FrozenError when called on a frozen String" do + -> { "foo".freeze.encode!("euc-jp") }.should raise_error(FrozenError) + end + + # http://redmine.ruby-lang.org/issues/show/1836 + it "raises a FrozenError when called on a frozen String when it's a no-op" do + -> { "foo".freeze.encode!("utf-8") }.should raise_error(FrozenError) + end + + describe "when passed no options" do + it "returns self when Encoding.default_internal is nil" do + Encoding.default_internal = nil + str = +"あ" + str.encode!.should equal(str) + end + + it "returns self for a ASCII-only String when Encoding.default_internal is nil" do + Encoding.default_internal = nil + str = +"abc" + str.encode!.should equal(str) + end + end + + describe "when passed options" do + it "returns self for ASCII-only String when Encoding.default_internal is nil" do + Encoding.default_internal = nil + str = +"abc" + str.encode!(invalid: :replace).should equal(str) + end + end + + describe "when passed to encoding" do + it "returns self" do + str = +"abc" + result = str.encode!(Encoding::BINARY) + result.encoding.should equal(Encoding::BINARY) + result.should equal(str) + end + end + + describe "when passed to, from" do + it "returns self" do + str = +"ああ" + result = str.encode!("euc-jp", "utf-8") + result.encoding.should equal(Encoding::EUC_JP) + result.should equal(str) + end + end +end diff --git a/spec/ruby/core/string/encoding_spec.rb b/spec/ruby/core/string/encoding_spec.rb new file mode 100644 index 0000000000..f6e8fd3470 --- /dev/null +++ b/spec/ruby/core/string/encoding_spec.rb @@ -0,0 +1,184 @@ +# -*- encoding: us-ascii -*- +require_relative '../../spec_helper' +require_relative 'fixtures/iso-8859-9-encoding' + +describe "String#encoding" do + it "returns an Encoding object" do + String.new.encoding.should be_an_instance_of(Encoding) + end + + it "is equal to the source encoding by default" do + s = StringSpecs::ISO88599Encoding.new + s.cedilla.encoding.should == s.source_encoding + s.cedilla.encode("utf-8").should == 350.chr(Encoding::UTF_8) # S-cedilla + end + + it "returns the given encoding if #force_encoding has been called" do + "a".dup.force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + end + + it "returns the given encoding if #encode!has been called" do + "a".dup.encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + end +end + +describe "String#encoding for US-ASCII Strings" do + it "returns US-ASCII if self is US-ASCII" do + "a".encoding.should == Encoding::US_ASCII + end + + it "returns US-ASCII if self is US-ASCII only, despite the default internal encoding being different" do + default_internal = Encoding.default_internal + Encoding.default_internal = Encoding::UTF_8 + "a".encoding.should == Encoding::US_ASCII + Encoding.default_internal = default_internal + end + + it "returns US-ASCII if self is US-ASCII only, despite the default external encoding being different" do + default_external = Encoding.default_external + Encoding.default_external = Encoding::UTF_8 + "a".encoding.should == Encoding::US_ASCII + Encoding.default_external = default_external + end + + it "returns US-ASCII if self is US-ASCII only, despite the default internal and external encodings being different" do + default_internal = Encoding.default_internal + default_external = Encoding.default_external + Encoding.default_internal = Encoding::UTF_8 + Encoding.default_external = Encoding::UTF_8 + "a".encoding.should == Encoding::US_ASCII + Encoding.default_external = default_external + Encoding.default_internal = default_internal + end + + it "returns US-ASCII if self is US-ASCII only, despite the default encodings being different" do + default_internal = Encoding.default_internal + default_external = Encoding.default_external + Encoding.default_internal = Encoding::UTF_8 + Encoding.default_external = Encoding::UTF_8 + "a".encoding.should == Encoding::US_ASCII + Encoding.default_external = default_external + Encoding.default_internal = default_internal + end + +end + +describe "String#encoding for Strings with \\u escapes" do + it "returns UTF-8" do + "\u{4040}".encoding.should == Encoding::UTF_8 + end + + it "returns US-ASCII if self is US-ASCII only" do + s = "\u{40}" + s.ascii_only?.should be_true + s.encoding.should == Encoding::US_ASCII + end + + it "returns UTF-8 if self isn't US-ASCII only" do + s = "\u{4076}\u{619}" + s.ascii_only?.should be_false + s.encoding.should == Encoding::UTF_8 + end + + it "is not affected by the default internal encoding" do + default_internal = Encoding.default_internal + Encoding.default_internal = Encoding::ISO_8859_15 + "\u{5050}".encoding.should == Encoding::UTF_8 + "\u{50}".encoding.should == Encoding::US_ASCII + Encoding.default_internal = default_internal + end + + it "is not affected by the default external encoding" do + default_external = Encoding.default_external + Encoding.default_external = Encoding::SHIFT_JIS + "\u{50}".encoding.should == Encoding::US_ASCII + "\u{5050}".encoding.should == Encoding::UTF_8 + Encoding.default_external = default_external + end + + it "is not affected by both the default internal and external encoding being set at the same time" do + default_internal = Encoding.default_internal + default_external = Encoding.default_external + Encoding.default_internal = Encoding::EUC_JP + Encoding.default_external = Encoding::SHIFT_JIS + "\u{50}".encoding.should == Encoding::US_ASCII + "\u{507}".encoding.should == Encoding::UTF_8 + Encoding.default_external = default_external + Encoding.default_internal = default_internal + end + + it "returns the given encoding if #force_encoding has been called" do + "\u{20}".dup.force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "\u{2020}".dup.force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + end + + it "returns the given encoding if #encode!has been called" do + "\u{20}".dup.encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "\u{2020}".dup.encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + end +end + +describe "String#encoding for Strings with \\x escapes" do + + it "returns US-ASCII if self is US-ASCII only" do + s = "\x61" + s.ascii_only?.should be_true + s.encoding.should == Encoding::US_ASCII + end + + it "returns BINARY when an escape creates a byte with the 8th bit set if the source encoding is US-ASCII" do + __ENCODING__.should == Encoding::US_ASCII + str = " " + str.encoding.should == Encoding::US_ASCII + str += [0xDF].pack('C') + str.ascii_only?.should be_false + str.encoding.should == Encoding::BINARY + end + + # TODO: Deal with case when the byte in question isn't valid in the source + # encoding? + it "returns the source encoding when an escape creates a byte with the 8th bit set if the source encoding isn't US-ASCII" do + fixture = StringSpecs::ISO88599Encoding.new + fixture.source_encoding.should == Encoding::ISO8859_9 + fixture.x_escape.ascii_only?.should be_false + fixture.x_escape.encoding.should == Encoding::ISO8859_9 + end + + it "is not affected by the default internal encoding" do + default_internal = Encoding.default_internal + Encoding.default_internal = Encoding::ISO_8859_15 + "\x50".encoding.should == Encoding::US_ASCII + "\x50".encoding.should == Encoding::US_ASCII + Encoding.default_internal = default_internal + end + + it "is not affected by the default external encoding" do + default_external = Encoding.default_external + Encoding.default_external = Encoding::SHIFT_JIS + "\x50".encoding.should == Encoding::US_ASCII + [0xD4].pack('C').encoding.should == Encoding::BINARY + Encoding.default_external = default_external + end + + it "is not affected by both the default internal and external encoding being set at the same time" do + default_internal = Encoding.default_internal + default_external = Encoding.default_external + Encoding.default_internal = Encoding::EUC_JP + Encoding.default_external = Encoding::SHIFT_JIS + x50 = "\x50" + x50.encoding.should == Encoding::US_ASCII + [0xD4].pack('C').encoding.should == Encoding::BINARY + Encoding.default_external = default_external + Encoding.default_internal = default_internal + end + + it "returns the given encoding if #force_encoding has been called" do + "\x50".dup.force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + [212].pack('C').force_encoding(Encoding::ISO_8859_9).encoding.should == Encoding::ISO_8859_9 + end + + it "returns the given encoding if #encode!has been called" do + "\x50".dup.encode!(Encoding::SHIFT_JIS).encoding.should == Encoding::SHIFT_JIS + "x\00".dup.encode!(Encoding::UTF_8).encoding.should == Encoding::UTF_8 + end +end diff --git a/spec/ruby/core/string/end_with_spec.rb b/spec/ruby/core/string/end_with_spec.rb new file mode 100644 index 0000000000..ac4fff72ad --- /dev/null +++ b/spec/ruby/core/string/end_with_spec.rb @@ -0,0 +1,8 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative '../../shared/string/end_with' + +describe "String#end_with?" do + it_behaves_like :end_with, :to_s +end diff --git a/spec/ruby/core/string/eql_spec.rb b/spec/ruby/core/string/eql_spec.rb new file mode 100644 index 0000000000..397974d9fb --- /dev/null +++ b/spec/ruby/core/string/eql_spec.rb @@ -0,0 +1,21 @@ +require_relative '../../spec_helper' +require_relative 'shared/eql' + +describe "String#eql?" do + it_behaves_like :string_eql_value, :eql? + + describe "when given a non-String" do + it "returns false" do + 'hello'.should_not eql(5) + not_supported_on :opal do + 'hello'.should_not eql(:hello) + end + 'hello'.should_not eql(mock('x')) + end + + it "does not try to call #to_str on the given argument" do + (obj = mock('x')).should_not_receive(:to_str) + 'hello'.should_not eql(obj) + end + end +end diff --git a/spec/ruby/core/string/equal_value_spec.rb b/spec/ruby/core/string/equal_value_spec.rb new file mode 100644 index 0000000000..b9c9c372f8 --- /dev/null +++ b/spec/ruby/core/string/equal_value_spec.rb @@ -0,0 +1,8 @@ +require_relative '../../spec_helper' +require_relative 'shared/eql' +require_relative 'shared/equal_value' + +describe "String#==" do + it_behaves_like :string_eql_value, :== + it_behaves_like :string_equal_value, :== +end diff --git a/spec/ruby/core/string/fixtures/classes.rb b/spec/ruby/core/string/fixtures/classes.rb new file mode 100644 index 0000000000..26fcd51b5d --- /dev/null +++ b/spec/ruby/core/string/fixtures/classes.rb @@ -0,0 +1,60 @@ +class Object + # This helper is defined here rather than in MSpec because + # it is only used in #unpack specs. + def unpack_format(count=nil, repeat=nil) + format = "#{instance_variable_get(:@method)}#{count}" + format *= repeat if repeat + format.dup # because it may then become tainted + end +end + +module StringSpecs + class MyString < String; end + class MyArray < Array; end + class MyRange < Range; end + + class SubString < String + attr_reader :special + + def initialize(str=nil) + @special = str + end + end + + class InitializeString < String + attr_reader :ivar + + def initialize(other) + super + @ivar = 1 + end + + def initialize_copy(other) + ScratchPad.record object_id + end + end + + module StringModule + def repr + 1 + end + end + + class StringWithRaisingConstructor < String + def initialize(str) + raise ArgumentError.new('constructor was called') unless str == 'silly:string' + self.replace(str) + end + end + + class SpecialVarProcessor + def process(match) + if $~ != nil + str = $~[0] + else + str = "unset" + end + "<#{str}>" + end + end +end diff --git a/spec/ruby/core/string/fixtures/freeze_magic_comment.rb b/spec/ruby/core/string/fixtures/freeze_magic_comment.rb new file mode 100644 index 0000000000..2b87a16328 --- /dev/null +++ b/spec/ruby/core/string/fixtures/freeze_magic_comment.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +print (+ 'frozen string').frozen? ? 'immutable' : 'mutable' diff --git a/spec/ruby/core/string/fixtures/iso-8859-9-encoding.rb b/spec/ruby/core/string/fixtures/iso-8859-9-encoding.rb new file mode 100644 index 0000000000..cfa91dedc3 --- /dev/null +++ b/spec/ruby/core/string/fixtures/iso-8859-9-encoding.rb @@ -0,0 +1,9 @@ +# -*- encoding: iso-8859-9 -*- +module StringSpecs + class ISO88599Encoding + def source_encoding; __ENCODING__; end + def x_escape; [0xDF].pack('C').force_encoding("iso-8859-9"); end + def ascii_only; "glark"; end + def cedilla; ""; end # S-cedilla + end +end diff --git a/spec/ruby/core/string/fixtures/to_c.rb b/spec/ruby/core/string/fixtures/to_c.rb new file mode 100644 index 0000000000..7776933263 --- /dev/null +++ b/spec/ruby/core/string/fixtures/to_c.rb @@ -0,0 +1,5 @@ +module StringSpecs + def self.to_c_method(string) + string.to_c + end +end diff --git a/spec/ruby/core/string/force_encoding_spec.rb b/spec/ruby/core/string/force_encoding_spec.rb new file mode 100644 index 0000000000..2259dcf3cf --- /dev/null +++ b/spec/ruby/core/string/force_encoding_spec.rb @@ -0,0 +1,72 @@ +# frozen_string_literal: false +require_relative '../../spec_helper' + +describe "String#force_encoding" do + it "accepts a String as the name of an Encoding" do + "abc".force_encoding('shift_jis').encoding.should == Encoding::Shift_JIS + end + + describe "with a special encoding name" do + before :each do + @original_encoding = Encoding.default_internal + end + + after :each do + Encoding.default_internal = @original_encoding + end + + it "accepts valid special encoding names" do + Encoding.default_internal = "US-ASCII" + "abc".force_encoding("internal").encoding.should == Encoding::US_ASCII + end + + it "defaults to BINARY if special encoding name is not set" do + Encoding.default_internal = nil + "abc".force_encoding("internal").encoding.should == Encoding::BINARY + end + end + + it "accepts an Encoding instance" do + "abc".force_encoding(Encoding::SHIFT_JIS).encoding.should == Encoding::Shift_JIS + end + + it "calls #to_str to convert an object to an encoding name" do + obj = mock("force_encoding") + obj.should_receive(:to_str).and_return("utf-8") + + "abc".force_encoding(obj).encoding.should == Encoding::UTF_8 + end + + it "raises a TypeError if #to_str does not return a String" do + obj = mock("force_encoding") + obj.should_receive(:to_str).and_return(1) + + -> { "abc".force_encoding(obj) }.should raise_error(TypeError) + end + + it "raises a TypeError if passed nil" do + -> { "abc".force_encoding(nil) }.should raise_error(TypeError) + end + + it "returns self" do + str = "abc" + str.force_encoding('utf-8').should equal(str) + end + + it "sets the encoding even if the String contents are invalid in that encoding" do + str = "\u{9765}" + str.force_encoding('euc-jp') + str.encoding.should == Encoding::EUC_JP + str.valid_encoding?.should be_false + end + + it "does not transcode self" do + str = "é" + str.dup.force_encoding('utf-16le').should_not == str.encode('utf-16le') + end + + it "raises a FrozenError if self is frozen" do + str = "abcd".freeze + -> { str.force_encoding(str.encoding) }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/freeze_spec.rb b/spec/ruby/core/string/freeze_spec.rb new file mode 100644 index 0000000000..2e8e70386d --- /dev/null +++ b/spec/ruby/core/string/freeze_spec.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: false +require_relative '../../spec_helper' + +describe "String#freeze" do + + it "produces the same object whenever called on an instance of a literal in the source" do + "abc".freeze.should equal "abc".freeze + end + + it "doesn't produce the same object for different instances of literals in the source" do + "abc".should_not equal "abc" + end + + it "being a special form doesn't change the value of defined?" do + defined?("abc".freeze).should == "method" + end + +end diff --git a/spec/ruby/core/string/getbyte_spec.rb b/spec/ruby/core/string/getbyte_spec.rb new file mode 100644 index 0000000000..27b7d826ea --- /dev/null +++ b/spec/ruby/core/string/getbyte_spec.rb @@ -0,0 +1,69 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' + +describe "String#getbyte" do + it "returns an Integer if given a valid index" do + "a".getbyte(0).should be_kind_of(Integer) + end + + it "starts indexing at 0" do + "b".getbyte(0).should == 98 + + # copy-on-write case + _str1, str2 = "fooXbar".split("X") + str2.getbyte(0).should == 98 + end + + it "counts from the end of the String if given a negative argument" do + "glark".getbyte(-1).should == "glark".getbyte(4) + + # copy-on-write case + _str1, str2 = "fooXbar".split("X") + str2.getbyte(-1).should == 114 + end + + it "returns an Integer between 0 and 255" do + "\x00".getbyte(0).should == 0 + [0xFF].pack('C').getbyte(0).should == 255 + 256.chr('utf-8').getbyte(0).should == 196 + 256.chr('utf-8').getbyte(1).should == 128 + end + + it "regards a multi-byte character as having multiple bytes" do + chr = "\u{998}" + chr.bytesize.should == 3 + chr.getbyte(0).should == 224 + chr.getbyte(1).should == 166 + chr.getbyte(2).should == 152 + end + + it "mirrors the output of #bytes" do + xDE = [0xDE].pack('C').force_encoding('utf-8') + str = "UTF-8 (\u{9865}} characters and hex escapes (#{xDE})" + str.bytes.to_a.each_with_index do |byte, index| + str.getbyte(index).should == byte + end + end + + it "interprets bytes relative to the String's encoding" do + str = "\u{333}" + str.encode('utf-8').getbyte(0).should_not == str.encode('utf-16le').getbyte(0) + end + + it "returns nil for out-of-bound indexes" do + "g".getbyte(1).should be_nil + end + + it "regards the empty String as containing no bytes" do + "".getbyte(0).should be_nil + end + + it "raises an ArgumentError unless given one argument" do + -> { "glark".getbyte }.should raise_error(ArgumentError) + -> { "food".getbyte(0,0) }.should raise_error(ArgumentError) + end + + it "raises a TypeError unless its argument can be coerced into an Integer" do + -> { "a".getbyte('a') }.should raise_error(TypeError) + end +end diff --git a/spec/ruby/core/string/grapheme_clusters_spec.rb b/spec/ruby/core/string/grapheme_clusters_spec.rb new file mode 100644 index 0000000000..380a245083 --- /dev/null +++ b/spec/ruby/core/string/grapheme_clusters_spec.rb @@ -0,0 +1,14 @@ +require_relative "../../spec_helper" +require_relative 'shared/chars' +require_relative 'shared/grapheme_clusters' + +describe "String#grapheme_clusters" do + it_behaves_like :string_chars, :grapheme_clusters + it_behaves_like :string_grapheme_clusters, :grapheme_clusters + + it "returns an array when no block given" do + string = "ab\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}\u{1F43E}" + string.grapheme_clusters.should == ['a', 'b', "\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}", "\u{1F43E}"] + + end +end diff --git a/spec/ruby/core/string/gsub_spec.rb b/spec/ruby/core/string/gsub_spec.rb new file mode 100644 index 0000000000..0d9f32eca2 --- /dev/null +++ b/spec/ruby/core/string/gsub_spec.rb @@ -0,0 +1,615 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe :string_gsub_named_capture, shared: true do + it "replaces \\k named backreferences with the regexp's corresponding capture" do + str = "hello" + + str.gsub(/(?<foo>[aeiou])/, '<\k<foo>>').should == "h<e>ll<o>" + str.gsub(/(?<foo>.)/, '\k<foo>\k<foo>').should == "hheelllloo" + end +end + +describe "String#gsub with pattern and replacement" do + it "inserts the replacement around every character when the pattern collapses" do + "hello".gsub(//, ".").should == ".h.e.l.l.o." + end + + it "respects unicode when the pattern collapses" do + str = "こにちわ" + reg = %r!! + + str.gsub(reg, ".").should == ".こ.に.ち.わ." + end + + it "doesn't freak out when replacing ^" do + "Text\n".gsub(/^/, ' ').should == " Text\n" + "Text\nFoo".gsub(/^/, ' ').should == " Text\n Foo" + end + + it "returns a copy of self with all occurrences of pattern replaced with replacement" do + "hello".gsub(/[aeiou]/, '*').should == "h*ll*" + + str = "hello homely world. hah!" + str.gsub(/\Ah\S+\s*/, "huh? ").should == "huh? homely world. hah!" + + str = "¿por qué?" + str.gsub(/([a-z\d]*)/, "*").should == "*¿** **é*?*" + end + + it "ignores a block if supplied" do + "food".gsub(/f/, "g") { "w" }.should == "good" + end + + it "supports \\G which matches at the beginning of the remaining (non-matched) string" do + str = "hello homely world. hah!" + str.gsub(/\Gh\S+\s*/, "huh? ").should == "huh? huh? world. hah!" + end + + it "supports /i for ignoring case" do + str = "Hello. How happy are you?" + str.gsub(/h/i, "j").should == "jello. jow jappy are you?" + str.gsub(/H/i, "j").should == "jello. jow jappy are you?" + end + + it "doesn't interpret regexp metacharacters if pattern is a string" do + "12345".gsub('\d', 'a').should == "12345" + '\d'.gsub('\d', 'a').should == "a" + end + + it "replaces \\1 sequences with the regexp's corresponding capture" do + str = "hello" + + str.gsub(/([aeiou])/, '<\1>').should == "h<e>ll<o>" + str.gsub(/(.)/, '\1\1').should == "hheelllloo" + + str.gsub(/.(.?)/, '<\0>(\1)').should == "<he>(e)<ll>(l)<o>()" + + str.gsub(/.(.)+/, '\1').should == "o" + + str = "ABCDEFGHIJKLabcdefghijkl" + re = /#{"(.)" * 12}/ + str.gsub(re, '\1').should == "Aa" + str.gsub(re, '\9').should == "Ii" + # Only the first 9 captures can be accessed in MRI + str.gsub(re, '\10').should == "A0a0" + end + + it "treats \\1 sequences without corresponding captures as empty strings" do + str = "hello!" + + str.gsub("", '<\1>').should == "<>h<>e<>l<>l<>o<>!<>" + str.gsub("h", '<\1>').should == "<>ello!" + + str.gsub(//, '<\1>').should == "<>h<>e<>l<>l<>o<>!<>" + str.gsub(/./, '\1\2\3').should == "" + str.gsub(/.(.{20})?/, '\1').should == "" + end + + it "replaces \\& and \\0 with the complete match" do + str = "hello!" + + str.gsub("", '<\0>').should == "<>h<>e<>l<>l<>o<>!<>" + str.gsub("", '<\&>').should == "<>h<>e<>l<>l<>o<>!<>" + str.gsub("he", '<\0>').should == "<he>llo!" + str.gsub("he", '<\&>').should == "<he>llo!" + str.gsub("l", '<\0>').should == "he<l><l>o!" + str.gsub("l", '<\&>').should == "he<l><l>o!" + + str.gsub(//, '<\0>').should == "<>h<>e<>l<>l<>o<>!<>" + str.gsub(//, '<\&>').should == "<>h<>e<>l<>l<>o<>!<>" + str.gsub(/../, '<\0>').should == "<he><ll><o!>" + str.gsub(/../, '<\&>').should == "<he><ll><o!>" + str.gsub(/(.)./, '<\0>').should == "<he><ll><o!>" + end + + it "replaces \\` with everything before the current match" do + str = "hello!" + + str.gsub("", '<\`>').should == "<>h<h>e<he>l<hel>l<hell>o<hello>!<hello!>" + str.gsub("h", '<\`>').should == "<>ello!" + str.gsub("l", '<\`>').should == "he<he><hel>o!" + str.gsub("!", '<\`>').should == "hello<hello>" + + str.gsub(//, '<\`>').should == "<>h<h>e<he>l<hel>l<hell>o<hello>!<hello!>" + str.gsub(/../, '<\`>').should == "<><he><hell>" + end + + it "replaces \\' with everything after the current match" do + str = "hello!" + + str.gsub("", '<\\\'>').should == "<hello!>h<ello!>e<llo!>l<lo!>l<o!>o<!>!<>" + str.gsub("h", '<\\\'>').should == "<ello!>ello!" + str.gsub("ll", '<\\\'>').should == "he<o!>o!" + str.gsub("!", '<\\\'>').should == "hello<>" + + str.gsub(//, '<\\\'>').should == "<hello!>h<ello!>e<llo!>l<lo!>l<o!>o<!>!<>" + str.gsub(/../, '<\\\'>').should == "<llo!><o!><>" + end + + it "replaces \\+ with the last paren that actually matched" do + str = "hello!" + + str.gsub(/(.)(.)/, '\+').should == "el!" + str.gsub(/(.)(.)+/, '\+').should == "!" + str.gsub(/(.)()/, '\+').should == "" + str.gsub(/(.)(.{20})?/, '<\+>').should == "<h><e><l><l><o><!>" + + str = "ABCDEFGHIJKLabcdefghijkl" + re = /#{"(.)" * 12}/ + str.gsub(re, '\+').should == "Ll" + end + + it "treats \\+ as an empty string if there was no captures" do + "hello!".gsub(/./, '\+').should == "" + end + + it "maps \\\\ in replacement to \\" do + "hello".gsub(/./, '\\\\').should == '\\' * 5 + end + + it "leaves unknown \\x escapes in replacement untouched" do + "hello".gsub(/./, '\\x').should == '\\x' * 5 + "hello".gsub(/./, '\\y').should == '\\y' * 5 + end + + it "leaves \\ at the end of replacement untouched" do + "hello".gsub(/./, 'hah\\').should == 'hah\\' * 5 + end + + it_behaves_like :string_gsub_named_capture, :gsub + + it "handles pattern collapse" do + str = "こにちわ" + reg = %r!! + str.gsub(reg, ".").should == ".こ.に.ち.わ." + end + + it "tries to convert pattern to a string using to_str" do + pattern = mock('.') + def pattern.to_str() "." end + + "hello.".gsub(pattern, "!").should == "hello!" + end + + it "raises a TypeError when pattern can't be converted to a string" do + -> { "hello".gsub([], "x") }.should raise_error(TypeError) + -> { "hello".gsub(Object.new, "x") }.should raise_error(TypeError) + -> { "hello".gsub(nil, "x") }.should raise_error(TypeError) + end + + it "tries to convert replacement to a string using to_str" do + replacement = mock('hello_replacement') + def replacement.to_str() "hello_replacement" end + + "hello".gsub(/hello/, replacement).should == "hello_replacement" + end + + it "raises a TypeError when replacement can't be converted to a string" do + -> { "hello".gsub(/[aeiou]/, []) }.should raise_error(TypeError) + -> { "hello".gsub(/[aeiou]/, Object.new) }.should raise_error(TypeError) + -> { "hello".gsub(/[aeiou]/, nil) }.should raise_error(TypeError) + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("").gsub(//, "").should be_an_instance_of(String) + StringSpecs::MyString.new("").gsub(/foo/, "").should be_an_instance_of(String) + StringSpecs::MyString.new("foo").gsub(/foo/, "").should be_an_instance_of(String) + StringSpecs::MyString.new("foo").gsub("foo", "").should be_an_instance_of(String) + end + + it "sets $~ to MatchData of last match and nil when there's none" do + 'hello.'.gsub('hello', 'x') + $~[0].should == 'hello' + + 'hello.'.gsub('not', 'x') + $~.should == nil + + 'hello.'.gsub(/.(.)/, 'x') + $~[0].should == 'o.' + + 'hello.'.gsub(/not/, 'x') + $~.should == nil + end + + it "handles a pattern in a superset encoding" do + result = 'abc'.force_encoding(Encoding::US_ASCII).gsub('é', 'è') + result.should == 'abc' + result.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + result = 'été'.gsub('t'.force_encoding(Encoding::US_ASCII), 'u') + result.should == 'éué' + result.encoding.should == Encoding::UTF_8 + end +end + +describe "String#gsub with pattern and Hash" do + it "returns a copy of self with all occurrences of pattern replaced with the value of the corresponding hash key" do + "hello".gsub(/./, 'l' => 'L').should == "LL" + "hello!".gsub(/(.)(.)/, 'he' => 'she ', 'll' => 'said').should == 'she said' + "hello".gsub('l', 'l' => 'el').should == 'heelelo' + end + + it "ignores keys that don't correspond to matches" do + "hello".gsub(/./, 'z' => 'L', 'h' => 'b', 'o' => 'ow').should == "bow" + end + + it "returns an empty string if the pattern matches but the hash specifies no replacements" do + "hello".gsub(/./, 'z' => 'L').should == "" + end + + it "ignores non-String keys" do + "tattoo".gsub(/(tt)/, 'tt' => 'b', tt: 'z').should == "taboo" + end + + it "uses a key's value as many times as needed" do + "food".gsub(/o/, 'o' => '0').should == "f00d" + end + + it "uses the hash's default value for missing keys" do + hsh = {} + hsh.default='?' + hsh['o'] = '0' + "food".gsub(/./, hsh).should == "?00?" + end + + it "coerces the hash values with #to_s" do + hsh = {} + hsh.default=[] + hsh['o'] = 0 + obj = mock('!') + obj.should_receive(:to_s).and_return('!') + hsh['!'] = obj + "food!".gsub(/./, hsh).should == "[]00[]!" + end + + it "uses the hash's value set from default_proc for missing keys" do + hsh = {} + hsh.default_proc = -> k, v { 'lamb' } + "food!".gsub(/./, hsh).should == "lamblamblamblamblamb" + end + + it "sets $~ to MatchData of last match and nil when there's none for access from outside" do + 'hello.'.gsub('l', 'l' => 'L') + $~.begin(0).should == 3 + $~[0].should == 'l' + + 'hello.'.gsub('not', 'ot' => 'to') + $~.should == nil + + 'hello.'.gsub(/.(.)/, 'o' => ' hole') + $~[0].should == 'o.' + + 'hello.'.gsub(/not/, 'z' => 'glark') + $~.should == nil + end + + it "doesn't interpolate special sequences like \\1 for the block's return value" do + repl = '\& \0 \1 \` \\\' \+ \\\\ foo' + "hello".gsub(/(.+)/, 'hello' => repl ).should == repl + end +end + +describe "String#gsub! with pattern and Hash" do + + it "returns self with all occurrences of pattern replaced with the value of the corresponding hash key" do + "hello".gsub!(/./, 'l' => 'L').should == "LL" + "hello!".gsub!(/(.)(.)/, 'he' => 'she ', 'll' => 'said').should == 'she said' + "hello".gsub!('l', 'l' => 'el').should == 'heelelo' + end + + it "ignores keys that don't correspond to matches" do + "hello".gsub!(/./, 'z' => 'L', 'h' => 'b', 'o' => 'ow').should == "bow" + end + + it "replaces self with an empty string if the pattern matches but the hash specifies no replacements" do + "hello".gsub!(/./, 'z' => 'L').should == "" + end + + it "ignores non-String keys" do + "hello".gsub!(/(ll)/, 'll' => 'r', ll: 'z').should == "hero" + end + + it "uses a key's value as many times as needed" do + "food".gsub!(/o/, 'o' => '0').should == "f00d" + end + + it "uses the hash's default value for missing keys" do + hsh = {} + hsh.default='?' + hsh['o'] = '0' + "food".gsub!(/./, hsh).should == "?00?" + end + + it "coerces the hash values with #to_s" do + hsh = {} + hsh.default=[] + hsh['o'] = 0 + obj = mock('!') + obj.should_receive(:to_s).and_return('!') + hsh['!'] = obj + "food!".gsub!(/./, hsh).should == "[]00[]!" + end + + it "uses the hash's value set from default_proc for missing keys" do + hsh = {} + hsh.default_proc = -> k, v { 'lamb' } + "food!".gsub!(/./, hsh).should == "lamblamblamblamblamb" + end + + it "sets $~ to MatchData of last match and nil when there's none for access from outside" do + 'hello.'.gsub!('l', 'l' => 'L') + $~.begin(0).should == 3 + $~[0].should == 'l' + + 'hello.'.gsub!('not', 'ot' => 'to') + $~.should == nil + + 'hello.'.gsub!(/.(.)/, 'o' => ' hole') + $~[0].should == 'o.' + + 'hello.'.gsub!(/not/, 'z' => 'glark') + $~.should == nil + end + + it "doesn't interpolate special sequences like \\1 for the block's return value" do + repl = '\& \0 \1 \` \\\' \+ \\\\ foo' + "hello".gsub!(/(.+)/, 'hello' => repl ).should == repl + end +end + +describe "String#gsub with pattern and block" do + it "returns a copy of self with all occurrences of pattern replaced with the block's return value" do + "hello".gsub(/./) { |s| s.succ + ' ' }.should == "i f m m p " + "hello!".gsub(/(.)(.)/) { |*a| a.inspect }.should == '["he"]["ll"]["o!"]' + "hello".gsub('l') { 'x'}.should == 'hexxo' + end + + it "sets $~ for access from the block" do + str = "hello" + str.gsub(/([aeiou])/) { "<#{$~[1]}>" }.should == "h<e>ll<o>" + str.gsub(/([aeiou])/) { "<#{$1}>" }.should == "h<e>ll<o>" + str.gsub("l") { "<#{$~[0]}>" }.should == "he<l><l>o" + + offsets = [] + + str.gsub(/([aeiou])/) do + md = $~ + md.string.should == str + offsets << md.offset(0) + str + end.should == "hhellollhello" + + offsets.should == [[1, 2], [4, 5]] + end + + it "does not set $~ for procs created from methods" do + str = "hello" + str.gsub("l", &StringSpecs::SpecialVarProcessor.new.method(:process)).should == "he<unset><unset>o" + end + + it "restores $~ after leaving the block" do + [/./, "l"].each do |pattern| + old_md = nil + "hello".gsub(pattern) do + old_md = $~ + "ok".match(/./) + "x" + end + + $~[0].should == old_md[0] + $~.string.should == "hello" + end + end + + it "sets $~ to MatchData of last match and nil when there's none for access from outside" do + 'hello.'.gsub('l') { 'x' } + $~.begin(0).should == 3 + $~[0].should == 'l' + + 'hello.'.gsub('not') { 'x' } + $~.should == nil + + 'hello.'.gsub(/.(.)/) { 'x' } + $~[0].should == 'o.' + + 'hello.'.gsub(/not/) { 'x' } + $~.should == nil + end + + it "doesn't interpolate special sequences like \\1 for the block's return value" do + repl = '\& \0 \1 \` \\\' \+ \\\\ foo' + "hello".gsub(/(.+)/) { repl }.should == repl + end + + it "converts the block's return value to a string using to_s" do + replacement = mock('hello_replacement') + def replacement.to_s() "hello_replacement" end + + "hello".gsub(/hello/) { replacement }.should == "hello_replacement" + + obj = mock('ok') + def obj.to_s() "ok" end + + "hello".gsub(/.+/) { obj }.should == "ok" + end + + it "uses the compatible encoding if they are compatible" do + s = "hello" + s2 = "#{195.chr}#{192.chr}#{195.chr}" + + s.gsub(/l/) { |bar| 195.chr }.encoding.should == Encoding::BINARY + s2.gsub("#{192.chr}") { |bar| "hello" }.encoding.should == Encoding::BINARY + end + + it "raises an Encoding::CompatibilityError if the encodings are not compatible" do + s = "hllëllo" + s2 = "hellö" + + -> { s.gsub(/l/) { |bar| "Русский".force_encoding("iso-8859-5") } }.should raise_error(Encoding::CompatibilityError) + -> { s2.gsub(/l/) { |bar| "Русский".force_encoding("iso-8859-5") } }.should raise_error(Encoding::CompatibilityError) + end + + it "replaces the incompatible part properly even if the encodings are not compatible" do + s = "hllëllo" + + s.gsub(/ë/) { |bar| "Русский".force_encoding("iso-8859-5") }.encoding.should == Encoding::ISO_8859_5 + end + + not_supported_on :opal do + it "raises an ArgumentError if encoding is not valid" do + x92 = [0x92].pack('C').force_encoding('utf-8') + -> { "a#{x92}b".gsub(/[^\x00-\x7f]/u, '') }.should raise_error(ArgumentError) + end + end +end + +describe "String#gsub with pattern and without replacement and block" do + it "returns an enumerator" do + enum = "abca".gsub(/a/) + enum.should be_an_instance_of(Enumerator) + enum.to_a.should == ["a", "a"] + end + + describe "returned Enumerator" do + describe "size" do + it "should return nil" do + "abca".gsub(/a/).size.should == nil + end + end + end +end + +describe "String#gsub with a string pattern" do + it "handles multibyte characters" do + "é".gsub("é", "â").should == "â" + "aé".gsub("é", "â").should == "aâ" + "éa".gsub("é", "â").should == "âa" + end +end + +describe "String#gsub! with pattern and replacement" do + it "modifies self in place and returns self" do + a = "hello" + a.gsub!(/[aeiou]/, '*').should equal(a) + a.should == "h*ll*" + end + + it "modifies self in place with multi-byte characters and returns self" do + a = "¿por qué?" + a.gsub!(/([a-z\d]*)/, "*").should equal(a) + a.should == "*¿** **é*?*" + end + + it "returns nil if no modifications were made" do + a = "hello" + a.gsub!(/z/, '*').should == nil + a.gsub!(/z/, 'z').should == nil + a.should == "hello" + end + + # See [ruby-core:23666] + it "raises a FrozenError when self is frozen" do + s = "hello" + s.freeze + + -> { s.gsub!(/ROAR/, "x") }.should raise_error(FrozenError) + -> { s.gsub!(/e/, "e") }.should raise_error(FrozenError) + -> { s.gsub!(/[aeiou]/, '*') }.should raise_error(FrozenError) + end + + it "handles a pattern in a superset encoding" do + string = 'abc'.force_encoding(Encoding::US_ASCII) + + result = string.gsub!('é', 'è') + + result.should == nil + string.should == 'abc' + string.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + string = 'été' + pattern = 't'.force_encoding(Encoding::US_ASCII) + + result = string.gsub!(pattern, 'u') + + result.should == string + string.should == 'éué' + string.encoding.should == Encoding::UTF_8 + end +end + +describe "String#gsub! with pattern and block" do + it "modifies self in place and returns self" do + a = "hello" + a.gsub!(/[aeiou]/) { '*' }.should equal(a) + a.should == "h*ll*" + end + + it "returns nil if no modifications were made" do + a = "hello" + a.gsub!(/z/) { '*' }.should == nil + a.gsub!(/z/) { 'z' }.should == nil + a.should == "hello" + end + + # See [ruby-core:23663] + it "raises a FrozenError when self is frozen" do + s = "hello" + s.freeze + + -> { s.gsub!(/ROAR/) { "x" } }.should raise_error(FrozenError) + -> { s.gsub!(/e/) { "e" } }.should raise_error(FrozenError) + -> { s.gsub!(/[aeiou]/) { '*' } }.should raise_error(FrozenError) + end + + it "uses the compatible encoding if they are compatible" do + s = "hello" + s2 = "#{195.chr}#{192.chr}#{195.chr}" + + s.gsub!(/l/) { |bar| 195.chr }.encoding.should == Encoding::BINARY + s2.gsub!("#{192.chr}") { |bar| "hello" }.encoding.should == Encoding::BINARY + end + + it "raises an Encoding::CompatibilityError if the encodings are not compatible" do + s = "hllëllo" + s2 = "hellö" + + -> { s.gsub!(/l/) { |bar| "Русский".force_encoding("iso-8859-5") } }.should raise_error(Encoding::CompatibilityError) + -> { s2.gsub!(/l/) { |bar| "Русский".force_encoding("iso-8859-5") } }.should raise_error(Encoding::CompatibilityError) + end + + it "replaces the incompatible part properly even if the encodings are not compatible" do + s = "hllëllo" + + s.gsub!(/ë/) { |bar| "Русский".force_encoding("iso-8859-5") }.encoding.should == Encoding::ISO_8859_5 + end + + not_supported_on :opal do + it "raises an ArgumentError if encoding is not valid" do + x92 = [0x92].pack('C').force_encoding('utf-8') + -> { "a#{x92}b".gsub!(/[^\x00-\x7f]/u, '') }.should raise_error(ArgumentError) + end + end +end + +describe "String#gsub! with pattern and without replacement and block" do + it "returns an enumerator" do + enum = "abca".gsub!(/a/) + enum.should be_an_instance_of(Enumerator) + enum.to_a.should == ["a", "a"] + end + + describe "returned Enumerator" do + describe "size" do + it "should return nil" do + "abca".gsub!(/a/).size.should == nil + end + end + end +end diff --git a/spec/ruby/core/string/hash_spec.rb b/spec/ruby/core/string/hash_spec.rb new file mode 100644 index 0000000000..0b26214b55 --- /dev/null +++ b/spec/ruby/core/string/hash_spec.rb @@ -0,0 +1,9 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#hash" do + it "returns a hash based on a string's length and content" do + "abc".hash.should == "abc".hash + "abc".hash.should_not == "cba".hash + end +end diff --git a/spec/ruby/core/string/hex_spec.rb b/spec/ruby/core/string/hex_spec.rb new file mode 100644 index 0000000000..364e915681 --- /dev/null +++ b/spec/ruby/core/string/hex_spec.rb @@ -0,0 +1,49 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +# TODO: Move actual results to String#to_int() and spec in terms of it +describe "String#hex" do + it "treats leading characters of self as a string of hex digits" do + "0a".hex.should == 10 + "0o".hex.should == 0 + "0x".hex.should == 0 + "A_BAD_BABE".hex.should == 0xABADBABE + "0b1010".hex.should == "b1010".hex + "0d500".hex.should == "d500".hex + "abcdefG".hex.should == 0xabcdef + end + + it "does not accept a sequence of underscores as part of a number" do + "a__b".hex.should == 0xa + "a____b".hex.should == 0xa + "a___f".hex.should == 0xa + end + + it "takes an optional sign" do + "-1234".hex.should == -4660 + "+1234".hex.should == 4660 + end + + it "takes an optional 0x" do + "0x0a".hex.should == 10 + "0a".hex.should == 10 + end + + it "requires that the sign is in front of the 0x if present" do + "-0x1".hex.should == -1 + "0x-1".hex.should == 0 + end + + it "returns 0 on error" do + "".hex.should == 0 + "+-5".hex.should == 0 + "wombat".hex.should == 0 + "0x0x42".hex.should == 0 + end + + it "returns 0 if sequence begins with underscore" do + "_a".hex.should == 0 + "___b".hex.should == 0 + "___0xc".hex.should == 0 + end +end diff --git a/spec/ruby/core/string/include_spec.rb b/spec/ruby/core/string/include_spec.rb new file mode 100644 index 0000000000..9781140a55 --- /dev/null +++ b/spec/ruby/core/string/include_spec.rb @@ -0,0 +1,49 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#include? with String" do + it "returns true if self contains other_str" do + "hello".include?("lo").should == true + "hello".include?("ol").should == false + end + + it "ignores subclass differences" do + "hello".include?(StringSpecs::MyString.new("lo")).should == true + StringSpecs::MyString.new("hello").include?("lo").should == true + StringSpecs::MyString.new("hello").include?(StringSpecs::MyString.new("lo")).should == true + end + + it "returns true if both strings are empty" do + "".should.include?("") + "".dup.force_encoding("EUC-JP").should.include?("") + "".should.include?("".dup.force_encoding("EUC-JP")) + "".dup.force_encoding("EUC-JP").should.include?("".dup.force_encoding("EUC-JP")) + end + + it "returns true if the RHS is empty" do + "a".should.include?("") + "a".dup.force_encoding("EUC-JP").should.include?("") + "a".should.include?("".dup.force_encoding("EUC-JP")) + "a".dup.force_encoding("EUC-JP").should.include?("".dup.force_encoding("EUC-JP")) + end + + it "tries to convert other to string using to_str" do + other = mock('lo') + other.should_receive(:to_str).and_return("lo") + + "hello".include?(other).should == true + end + + it "raises a TypeError if other can't be converted to string" do + -> { "hello".include?([]) }.should raise_error(TypeError) + -> { "hello".include?('h'.ord) }.should raise_error(TypeError) + -> { "hello".include?(mock('x')) }.should raise_error(TypeError) + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + pat = "ア".encode Encoding::EUC_JP + -> do + "あれ".include?(pat) + end.should raise_error(Encoding::CompatibilityError) + end +end diff --git a/spec/ruby/core/string/index_spec.rb b/spec/ruby/core/string/index_spec.rb new file mode 100644 index 0000000000..835263a2cd --- /dev/null +++ b/spec/ruby/core/string/index_spec.rb @@ -0,0 +1,350 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#index" do + it "raises a TypeError if passed nil" do + -> { "abc".index nil }.should raise_error(TypeError) + end + + it "raises a TypeError if passed a boolean" do + -> { "abc".index true }.should raise_error(TypeError) + end + + it "raises a TypeError if passed a Symbol" do + -> { "abc".index :a }.should raise_error(TypeError) + end + + it "calls #to_str to convert the first argument" do + char = mock("string index char") + char.should_receive(:to_str).and_return("b") + "abc".index(char).should == 1 + end + + it "calls #to_int to convert the second argument" do + offset = mock("string index offset") + offset.should_receive(:to_int).and_return(1) + "abc".index("c", offset).should == 2 + end + + it "raises a TypeError if passed an Integer" do + -> { "abc".index 97 }.should raise_error(TypeError) + end +end + +describe "String#index with String" do + it "behaves the same as String#index(char) for one-character strings" do + "blablabla hello cruel world...!".split("").uniq.each do |str| + chr = str[0] + str.index(str).should == str.index(chr) + + 0.upto(str.size + 1) do |start| + str.index(str, start).should == str.index(chr, start) + end + + (-str.size - 1).upto(-1) do |start| + str.index(str, start).should == str.index(chr, start) + end + end + end + + it "returns the index of the first occurrence of the given substring" do + "blablabla".index("").should == 0 + "blablabla".index("b").should == 0 + "blablabla".index("bla").should == 0 + "blablabla".index("blabla").should == 0 + "blablabla".index("blablabla").should == 0 + + "blablabla".index("l").should == 1 + "blablabla".index("la").should == 1 + "blablabla".index("labla").should == 1 + "blablabla".index("lablabla").should == 1 + + "blablabla".index("a").should == 2 + "blablabla".index("abla").should == 2 + "blablabla".index("ablabla").should == 2 + end + + it "doesn't set $~" do + $~ = nil + + 'hello.'.index('ll') + $~.should == nil + end + + it "ignores string subclasses" do + "blablabla".index(StringSpecs::MyString.new("bla")).should == 0 + StringSpecs::MyString.new("blablabla").index("bla").should == 0 + StringSpecs::MyString.new("blablabla").index(StringSpecs::MyString.new("bla")).should == 0 + end + + it "starts the search at the given offset" do + "blablabla".index("bl", 0).should == 0 + "blablabla".index("bl", 1).should == 3 + "blablabla".index("bl", 2).should == 3 + "blablabla".index("bl", 3).should == 3 + + "blablabla".index("bla", 0).should == 0 + "blablabla".index("bla", 1).should == 3 + "blablabla".index("bla", 2).should == 3 + "blablabla".index("bla", 3).should == 3 + + "blablabla".index("blab", 0).should == 0 + "blablabla".index("blab", 1).should == 3 + "blablabla".index("blab", 2).should == 3 + "blablabla".index("blab", 3).should == 3 + + "blablabla".index("la", 1).should == 1 + "blablabla".index("la", 2).should == 4 + "blablabla".index("la", 3).should == 4 + "blablabla".index("la", 4).should == 4 + + "blablabla".index("lab", 1).should == 1 + "blablabla".index("lab", 2).should == 4 + "blablabla".index("lab", 3).should == 4 + "blablabla".index("lab", 4).should == 4 + + "blablabla".index("ab", 2).should == 2 + "blablabla".index("ab", 3).should == 5 + "blablabla".index("ab", 4).should == 5 + "blablabla".index("ab", 5).should == 5 + + "blablabla".index("", 0).should == 0 + "blablabla".index("", 1).should == 1 + "blablabla".index("", 2).should == 2 + "blablabla".index("", 7).should == 7 + "blablabla".index("", 8).should == 8 + "blablabla".index("", 9).should == 9 + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.index(needle, offset).should == + str.index(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".index("B").should == nil + "blablabla".index("z").should == nil + "blablabla".index("BLA").should == nil + "blablabla".index("blablablabla").should == nil + "blablabla".index("", 10).should == nil + + "hello".index("he", 1).should == nil + "hello".index("he", 2).should == nil + "I’ve got a multibyte character.\n".index("\n\n").should == nil + end + + it "returns the character index of a multibyte character" do + "ありがとう".index("が").should == 2 + end + + it "returns the character index after offset" do + "われわれ".index("わ", 1).should == 2 + "ありがとうありがとう".index("が", 3).should == 7 + end + + it "returns the character index after a partial first match" do + "</</h".index("</h").should == 2 + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + char = "れ".encode Encoding::EUC_JP + -> do + "あれ".index char + end.should raise_error(Encoding::CompatibilityError) + end + + it "handles a substring in a superset encoding" do + 'abc'.dup.force_encoding(Encoding::US_ASCII).index('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.index('t'.dup.force_encoding(Encoding::US_ASCII)).should == 1 + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + str = 'abc'.dup.force_encoding("ISO-2022-JP") + pattern = 'b'.dup.force_encoding("EUC-JP") + + -> { str.index(pattern) }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: ISO-2022-JP and EUC-JP") + end +end + +describe "String#index with Regexp" do + it "behaves the same as String#index(string) for escaped string regexps" do + ["blablabla", "hello cruel world...!"].each do |str| + ["", "b", "bla", "lab", "o c", "d."].each do |needle| + regexp = Regexp.new(Regexp.escape(needle)) + str.index(regexp).should == str.index(needle) + + 0.upto(str.size + 1) do |start| + str.index(regexp, start).should == str.index(needle, start) + end + + (-str.size - 1).upto(-1) do |start| + str.index(regexp, start).should == str.index(needle, start) + end + end + end + end + + it "returns the index of the first match of regexp" do + "blablabla".index(/bla/).should == 0 + "blablabla".index(/BLA/i).should == 0 + + "blablabla".index(/.{0}/).should == 0 + "blablabla".index(/.{6}/).should == 0 + "blablabla".index(/.{9}/).should == 0 + + "blablabla".index(/.*/).should == 0 + "blablabla".index(/.+/).should == 0 + + "blablabla".index(/lab|b/).should == 0 + + not_supported_on :opal do + "blablabla".index(/\A/).should == 0 + "blablabla".index(/\Z/).should == 9 + "blablabla".index(/\z/).should == 9 + "blablabla\n".index(/\Z/).should == 9 + "blablabla\n".index(/\z/).should == 10 + end + + "blablabla".index(/^/).should == 0 + "\nblablabla".index(/^/).should == 0 + "b\nablabla".index(/$/).should == 1 + "bl\nablabla".index(/$/).should == 2 + + "blablabla".index(/.l./).should == 0 + end + + it "sets $~ to MatchData of match and nil when there's none" do + 'hello.'.index(/.(.)/) + $~[0].should == 'he' + + 'hello.'.index(/not/) + $~.should == nil + end + + ruby_bug "#20421", ""..."3.3" do + it "always clear $~" do + "a".index(/a/) + $~.should_not == nil + + string = "blablabla" + string.index(/bla/, string.length + 1) + $~.should == nil + end + end + + it "starts the search at the given offset" do + "blablabla".index(/.{0}/, 5).should == 5 + "blablabla".index(/.{1}/, 5).should == 5 + "blablabla".index(/.{2}/, 5).should == 5 + "blablabla".index(/.{3}/, 5).should == 5 + "blablabla".index(/.{4}/, 5).should == 5 + + "blablabla".index(/.{0}/, 3).should == 3 + "blablabla".index(/.{1}/, 3).should == 3 + "blablabla".index(/.{2}/, 3).should == 3 + "blablabla".index(/.{5}/, 3).should == 3 + "blablabla".index(/.{6}/, 3).should == 3 + + "blablabla".index(/.l./, 0).should == 0 + "blablabla".index(/.l./, 1).should == 3 + "blablabla".index(/.l./, 2).should == 3 + "blablabla".index(/.l./, 3).should == 3 + + "xblaxbla".index(/x./, 0).should == 0 + "xblaxbla".index(/x./, 1).should == 4 + "xblaxbla".index(/x./, 2).should == 4 + + not_supported_on :opal do + "blablabla\n".index(/\Z/, 9).should == 9 + end + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.index(needle, offset).should == + str.index(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".index(/BLA/).should == nil + + "blablabla".index(/.{10}/).should == nil + "blaxbla".index(/.x/, 3).should == nil + "blaxbla".index(/..x/, 2).should == nil + end + + it "returns nil if the Regexp matches the empty string and the offset is out of range" do + "ruby".index(//,12).should be_nil + end + + it "supports \\G which matches at the given start offset" do + "helloYOU.".index(/\GYOU/, 5).should == 5 + "helloYOU.".index(/\GYOU/).should == nil + + re = /\G.+YOU/ + # The # marks where \G will match. + [ + ["#hi!YOUall.", 0], + ["h#i!YOUall.", 1], + ["hi#!YOUall.", 2], + ["hi!#YOUall.", nil] + ].each do |spec| + + start = spec[0].index("#") + str = spec[0].delete("#") + + str.index(re, start).should == spec[1] + end + end + + it "converts start_offset to an integer via to_int" do + obj = mock('1') + obj.should_receive(:to_int).and_return(1) + "RWOARW".index(/R./, obj).should == 4 + end + + it "returns the character index of a multibyte character" do + "ありがとう".index(/が/).should == 2 + end + + it "returns the character index after offset" do + "われわれ".index(/わ/, 1).should == 2 + end + + it "treats the offset as a character index" do + "われわわれ".index(/わ/, 3).should == 3 + end + + ruby_bug "#19763", ""..."3.3.0" do + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + re = Regexp.new "れ".encode(Encoding::EUC_JP) + -> do + "あれ".index re + end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") + end + end + + # The exception message was incorrectly "incompatible character encodings: UTF-8 and EUC-JP" before 3.3.0 + # Still test that the right exception class is used before that. + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + re = Regexp.new "れ".encode(Encoding::EUC_JP) + -> do + "あれ".index re + end.should raise_error(Encoding::CompatibilityError) + end +end diff --git a/spec/ruby/core/string/initialize_spec.rb b/spec/ruby/core/string/initialize_spec.rb new file mode 100644 index 0000000000..08734cc916 --- /dev/null +++ b/spec/ruby/core/string/initialize_spec.rb @@ -0,0 +1,26 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/replace' + +describe "String#initialize" do + it "is a private method" do + String.should have_private_instance_method(:initialize) + end + + describe "with no arguments" do + it "does not change self" do + s = "some string" + s.send :initialize + s.should == "some string" + end + + it "does not raise an exception when frozen" do + a = "hello".freeze + a.send(:initialize).should equal(a) + end + end + + describe "with an argument" do + it_behaves_like :string_replace, :initialize + end +end diff --git a/spec/ruby/core/string/insert_spec.rb b/spec/ruby/core/string/insert_spec.rb new file mode 100644 index 0000000000..483f3c9367 --- /dev/null +++ b/spec/ruby/core/string/insert_spec.rb @@ -0,0 +1,81 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#insert with index, other" do + it "inserts other before the character at the given index" do + "abcd".insert(0, 'X').should == "Xabcd" + "abcd".insert(3, 'X').should == "abcXd" + "abcd".insert(4, 'X').should == "abcdX" + end + + it "modifies self in place" do + a = "abcd" + a.insert(4, 'X').should == "abcdX" + a.should == "abcdX" + end + + it "inserts after the given character on an negative count" do + "abcd".insert(-5, 'X').should == "Xabcd" + "abcd".insert(-3, 'X').should == "abXcd" + "abcd".insert(-1, 'X').should == "abcdX" + end + + it "raises an IndexError if the index is beyond string" do + -> { "abcd".insert(5, 'X') }.should raise_error(IndexError) + -> { "abcd".insert(-6, 'X') }.should raise_error(IndexError) + end + + it "converts index to an integer using to_int" do + other = mock('-3') + other.should_receive(:to_int).and_return(-3) + + "abcd".insert(other, "XYZ").should == "abXYZcd" + end + + it "converts other to a string using to_str" do + other = mock('XYZ') + other.should_receive(:to_str).and_return("XYZ") + + "abcd".insert(-3, other).should == "abXYZcd" + end + + it "raises a TypeError if other can't be converted to string" do + -> { "abcd".insert(-6, Object.new)}.should raise_error(TypeError) + -> { "abcd".insert(-6, []) }.should raise_error(TypeError) + -> { "abcd".insert(-6, mock('x')) }.should raise_error(TypeError) + end + + it "raises a FrozenError if self is frozen" do + str = "abcd".freeze + -> { str.insert(4, '') }.should raise_error(FrozenError) + -> { str.insert(4, 'X') }.should raise_error(FrozenError) + end + + it "inserts a character into a multibyte encoded string" do + "ありがとう".insert(1, 'ü').should == "あüりがとう" + end + + it "returns a String in the compatible encoding" do + str = "".force_encoding(Encoding::US_ASCII) + str.insert(0, "ありがとう") + str.encoding.should == Encoding::UTF_8 + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + pat = "ア".encode Encoding::EUC_JP + -> do + "あれ".insert 0, pat + end.should raise_error(Encoding::CompatibilityError) + end + + it "should not call subclassed string methods" do + cls = Class.new(String) do + def replace(arg) + raise "should not call replace" + end + end + cls.new("abcd").insert(0, 'X').should == "Xabcd" + end +end diff --git a/spec/ruby/core/string/inspect_spec.rb b/spec/ruby/core/string/inspect_spec.rb new file mode 100644 index 0000000000..15db06c7f5 --- /dev/null +++ b/spec/ruby/core/string/inspect_spec.rb @@ -0,0 +1,520 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#inspect" do + it "does not return a subclass instance" do + StringSpecs::MyString.new.inspect.should be_an_instance_of(String) + end + + it "returns a string with special characters replaced with \\<char> notation" do + [ ["\a", '"\\a"'], + ["\b", '"\\b"'], + ["\t", '"\\t"'], + ["\n", '"\\n"'], + ["\v", '"\\v"'], + ["\f", '"\\f"'], + ["\r", '"\\r"'], + ["\e", '"\\e"'] + ].should be_computed_by(:inspect) + end + + it "returns a string with special characters replaced with \\<char> notation for UTF-16" do + pairs = [ + ["\a", '"\\a"'], + ["\b", '"\\b"'], + ["\t", '"\\t"'], + ["\n", '"\\n"'], + ["\v", '"\\v"'], + ["\f", '"\\f"'], + ["\r", '"\\r"'], + ["\e", '"\\e"'] + ].map { |str, result| [str.encode('UTF-16LE'), result] } + + pairs.should be_computed_by(:inspect) + end + + it "returns a string with \" and \\ escaped with a backslash" do + [ ["\"", '"\\""'], + ["\\", '"\\\\"'] + ].should be_computed_by(:inspect) + end + + it "returns a string with \\#<char> when # is followed by $, @, {" do + [ ["\#$", '"\\#$"'], + ["\#@", '"\\#@"'], + ["\#{", '"\\#{"'] + ].should be_computed_by(:inspect) + end + + it "returns a string with # not escaped when followed by any other character" do + [ ["#", '"#"'], + ["#1", '"#1"'] + ].should be_computed_by(:inspect) + end + + it "returns a string with printable non-alphanumeric characters unescaped" do + [ [" ", '" "'], + ["!", '"!"'], + ["$", '"$"'], + ["%", '"%"'], + ["&", '"&"'], + ["'", '"\'"'], + ["(", '"("'], + [")", '")"'], + ["*", '"*"'], + ["+", '"+"'], + [",", '","'], + ["-", '"-"'], + [".", '"."'], + ["/", '"/"'], + [":", '":"'], + [";", '";"'], + ["<", '"<"'], + ["=", '"="'], + [">", '">"'], + ["?", '"?"'], + ["@", '"@"'], + ["[", '"["'], + ["]", '"]"'], + ["^", '"^"'], + ["_", '"_"'], + ["`", '"`"'], + ["{", '"{"'], + ["|", '"|"'], + ["}", '"}"'], + ["~", '"~"'] + ].should be_computed_by(:inspect) + end + + it "returns a string with numeric characters unescaped" do + [ ["0", '"0"'], + ["1", '"1"'], + ["2", '"2"'], + ["3", '"3"'], + ["4", '"4"'], + ["5", '"5"'], + ["6", '"6"'], + ["7", '"7"'], + ["8", '"8"'], + ["9", '"9"'], + ].should be_computed_by(:inspect) + end + + it "returns a string with upper-case alpha characters unescaped" do + [ ["A", '"A"'], + ["B", '"B"'], + ["C", '"C"'], + ["D", '"D"'], + ["E", '"E"'], + ["F", '"F"'], + ["G", '"G"'], + ["H", '"H"'], + ["I", '"I"'], + ["J", '"J"'], + ["K", '"K"'], + ["L", '"L"'], + ["M", '"M"'], + ["N", '"N"'], + ["O", '"O"'], + ["P", '"P"'], + ["Q", '"Q"'], + ["R", '"R"'], + ["S", '"S"'], + ["T", '"T"'], + ["U", '"U"'], + ["V", '"V"'], + ["W", '"W"'], + ["X", '"X"'], + ["Y", '"Y"'], + ["Z", '"Z"'] + ].should be_computed_by(:inspect) + end + + it "returns a string with lower-case alpha characters unescaped" do + [ ["a", '"a"'], + ["b", '"b"'], + ["c", '"c"'], + ["d", '"d"'], + ["e", '"e"'], + ["f", '"f"'], + ["g", '"g"'], + ["h", '"h"'], + ["i", '"i"'], + ["j", '"j"'], + ["k", '"k"'], + ["l", '"l"'], + ["m", '"m"'], + ["n", '"n"'], + ["o", '"o"'], + ["p", '"p"'], + ["q", '"q"'], + ["r", '"r"'], + ["s", '"s"'], + ["t", '"t"'], + ["u", '"u"'], + ["v", '"v"'], + ["w", '"w"'], + ["x", '"x"'], + ["y", '"y"'], + ["z", '"z"'] + ].should be_computed_by(:inspect) + end + + it "returns a string with non-printing characters replaced by \\x notation" do + # Avoid the file encoding by computing the string with #chr. + [ [0001.chr, '"\\x01"'], + [0002.chr, '"\\x02"'], + [0003.chr, '"\\x03"'], + [0004.chr, '"\\x04"'], + [0005.chr, '"\\x05"'], + [0006.chr, '"\\x06"'], + [0016.chr, '"\\x0E"'], + [0017.chr, '"\\x0F"'], + [0020.chr, '"\\x10"'], + [0021.chr, '"\\x11"'], + [0022.chr, '"\\x12"'], + [0023.chr, '"\\x13"'], + [0024.chr, '"\\x14"'], + [0025.chr, '"\\x15"'], + [0026.chr, '"\\x16"'], + [0027.chr, '"\\x17"'], + [0030.chr, '"\\x18"'], + [0031.chr, '"\\x19"'], + [0032.chr, '"\\x1A"'], + [0034.chr, '"\\x1C"'], + [0035.chr, '"\\x1D"'], + [0036.chr, '"\\x1E"'], + [0037.chr, '"\\x1F"'], + [0177.chr, '"\\x7F"'], + [0200.chr, '"\\x80"'], + [0201.chr, '"\\x81"'], + [0202.chr, '"\\x82"'], + [0203.chr, '"\\x83"'], + [0204.chr, '"\\x84"'], + [0205.chr, '"\\x85"'], + [0206.chr, '"\\x86"'], + [0207.chr, '"\\x87"'], + [0210.chr, '"\\x88"'], + [0211.chr, '"\\x89"'], + [0212.chr, '"\\x8A"'], + [0213.chr, '"\\x8B"'], + [0214.chr, '"\\x8C"'], + [0215.chr, '"\\x8D"'], + [0216.chr, '"\\x8E"'], + [0217.chr, '"\\x8F"'], + [0220.chr, '"\\x90"'], + [0221.chr, '"\\x91"'], + [0222.chr, '"\\x92"'], + [0223.chr, '"\\x93"'], + [0224.chr, '"\\x94"'], + [0225.chr, '"\\x95"'], + [0226.chr, '"\\x96"'], + [0227.chr, '"\\x97"'], + [0230.chr, '"\\x98"'], + [0231.chr, '"\\x99"'], + [0232.chr, '"\\x9A"'], + [0233.chr, '"\\x9B"'], + [0234.chr, '"\\x9C"'], + [0235.chr, '"\\x9D"'], + [0236.chr, '"\\x9E"'], + [0237.chr, '"\\x9F"'], + [0240.chr, '"\\xA0"'], + [0241.chr, '"\\xA1"'], + [0242.chr, '"\\xA2"'], + [0243.chr, '"\\xA3"'], + [0244.chr, '"\\xA4"'], + [0245.chr, '"\\xA5"'], + [0246.chr, '"\\xA6"'], + [0247.chr, '"\\xA7"'], + [0250.chr, '"\\xA8"'], + [0251.chr, '"\\xA9"'], + [0252.chr, '"\\xAA"'], + [0253.chr, '"\\xAB"'], + [0254.chr, '"\\xAC"'], + [0255.chr, '"\\xAD"'], + [0256.chr, '"\\xAE"'], + [0257.chr, '"\\xAF"'], + [0260.chr, '"\\xB0"'], + [0261.chr, '"\\xB1"'], + [0262.chr, '"\\xB2"'], + [0263.chr, '"\\xB3"'], + [0264.chr, '"\\xB4"'], + [0265.chr, '"\\xB5"'], + [0266.chr, '"\\xB6"'], + [0267.chr, '"\\xB7"'], + [0270.chr, '"\\xB8"'], + [0271.chr, '"\\xB9"'], + [0272.chr, '"\\xBA"'], + [0273.chr, '"\\xBB"'], + [0274.chr, '"\\xBC"'], + [0275.chr, '"\\xBD"'], + [0276.chr, '"\\xBE"'], + [0277.chr, '"\\xBF"'], + [0300.chr, '"\\xC0"'], + [0301.chr, '"\\xC1"'], + [0302.chr, '"\\xC2"'], + [0303.chr, '"\\xC3"'], + [0304.chr, '"\\xC4"'], + [0305.chr, '"\\xC5"'], + [0306.chr, '"\\xC6"'], + [0307.chr, '"\\xC7"'], + [0310.chr, '"\\xC8"'], + [0311.chr, '"\\xC9"'], + [0312.chr, '"\\xCA"'], + [0313.chr, '"\\xCB"'], + [0314.chr, '"\\xCC"'], + [0315.chr, '"\\xCD"'], + [0316.chr, '"\\xCE"'], + [0317.chr, '"\\xCF"'], + [0320.chr, '"\\xD0"'], + [0321.chr, '"\\xD1"'], + [0322.chr, '"\\xD2"'], + [0323.chr, '"\\xD3"'], + [0324.chr, '"\\xD4"'], + [0325.chr, '"\\xD5"'], + [0326.chr, '"\\xD6"'], + [0327.chr, '"\\xD7"'], + [0330.chr, '"\\xD8"'], + [0331.chr, '"\\xD9"'], + [0332.chr, '"\\xDA"'], + [0333.chr, '"\\xDB"'], + [0334.chr, '"\\xDC"'], + [0335.chr, '"\\xDD"'], + [0336.chr, '"\\xDE"'], + [0337.chr, '"\\xDF"'], + [0340.chr, '"\\xE0"'], + [0341.chr, '"\\xE1"'], + [0342.chr, '"\\xE2"'], + [0343.chr, '"\\xE3"'], + [0344.chr, '"\\xE4"'], + [0345.chr, '"\\xE5"'], + [0346.chr, '"\\xE6"'], + [0347.chr, '"\\xE7"'], + [0350.chr, '"\\xE8"'], + [0351.chr, '"\\xE9"'], + [0352.chr, '"\\xEA"'], + [0353.chr, '"\\xEB"'], + [0354.chr, '"\\xEC"'], + [0355.chr, '"\\xED"'], + [0356.chr, '"\\xEE"'], + [0357.chr, '"\\xEF"'], + [0360.chr, '"\\xF0"'], + [0361.chr, '"\\xF1"'], + [0362.chr, '"\\xF2"'], + [0363.chr, '"\\xF3"'], + [0364.chr, '"\\xF4"'], + [0365.chr, '"\\xF5"'], + [0366.chr, '"\\xF6"'], + [0367.chr, '"\\xF7"'], + [0370.chr, '"\\xF8"'], + [0371.chr, '"\\xF9"'], + [0372.chr, '"\\xFA"'], + [0373.chr, '"\\xFB"'], + [0374.chr, '"\\xFC"'], + [0375.chr, '"\\xFD"'], + [0376.chr, '"\\xFE"'], + [0377.chr, '"\\xFF"'] + ].should be_computed_by(:inspect) + end + + it "returns a string with a NUL character replaced by \\x notation" do + 0.chr.inspect.should == '"\\x00"' + end + + it "uses \\x notation for broken UTF-8 sequences" do + "\xF0\x9F".inspect.should == '"\\xF0\\x9F"' + end + + it "works for broken US-ASCII strings" do + s = "©".dup.force_encoding("US-ASCII") + s.inspect.should == '"\xC2\xA9"' + end + + describe "when default external is UTF-8" do + before :each do + @extenc, Encoding.default_external = Encoding.default_external, Encoding::UTF_8 + end + + after :each do + Encoding.default_external = @extenc + end + + it "returns a string with non-printing characters replaced by \\u notation for Unicode strings" do + [ [0001.chr('utf-8'), '"\u0001"'], + [0002.chr('utf-8'), '"\u0002"'], + [0003.chr('utf-8'), '"\u0003"'], + [0004.chr('utf-8'), '"\u0004"'], + [0005.chr('utf-8'), '"\u0005"'], + [0006.chr('utf-8'), '"\u0006"'], + [0016.chr('utf-8'), '"\u000E"'], + [0017.chr('utf-8'), '"\u000F"'], + [0020.chr('utf-8'), '"\u0010"'], + [0021.chr('utf-8'), '"\u0011"'], + [0022.chr('utf-8'), '"\u0012"'], + [0023.chr('utf-8'), '"\u0013"'], + [0024.chr('utf-8'), '"\u0014"'], + [0025.chr('utf-8'), '"\u0015"'], + [0026.chr('utf-8'), '"\u0016"'], + [0027.chr('utf-8'), '"\u0017"'], + [0030.chr('utf-8'), '"\u0018"'], + [0031.chr('utf-8'), '"\u0019"'], + [0032.chr('utf-8'), '"\u001A"'], + [0034.chr('utf-8'), '"\u001C"'], + [0035.chr('utf-8'), '"\u001D"'], + [0036.chr('utf-8'), '"\u001E"'], + [0037.chr('utf-8'), '"\u001F"'], + [0177.chr('utf-8'), '"\u007F"'], + [0200.chr('utf-8'), '"\u0080"'], + [0201.chr('utf-8'), '"\u0081"'], + [0202.chr('utf-8'), '"\u0082"'], + [0203.chr('utf-8'), '"\u0083"'], + [0204.chr('utf-8'), '"\u0084"'], + [0206.chr('utf-8'), '"\u0086"'], + [0207.chr('utf-8'), '"\u0087"'], + [0210.chr('utf-8'), '"\u0088"'], + [0211.chr('utf-8'), '"\u0089"'], + [0212.chr('utf-8'), '"\u008A"'], + [0213.chr('utf-8'), '"\u008B"'], + [0214.chr('utf-8'), '"\u008C"'], + [0215.chr('utf-8'), '"\u008D"'], + [0216.chr('utf-8'), '"\u008E"'], + [0217.chr('utf-8'), '"\u008F"'], + [0220.chr('utf-8'), '"\u0090"'], + [0221.chr('utf-8'), '"\u0091"'], + [0222.chr('utf-8'), '"\u0092"'], + [0223.chr('utf-8'), '"\u0093"'], + [0224.chr('utf-8'), '"\u0094"'], + [0225.chr('utf-8'), '"\u0095"'], + [0226.chr('utf-8'), '"\u0096"'], + [0227.chr('utf-8'), '"\u0097"'], + [0230.chr('utf-8'), '"\u0098"'], + [0231.chr('utf-8'), '"\u0099"'], + [0232.chr('utf-8'), '"\u009A"'], + [0233.chr('utf-8'), '"\u009B"'], + [0234.chr('utf-8'), '"\u009C"'], + [0235.chr('utf-8'), '"\u009D"'], + [0236.chr('utf-8'), '"\u009E"'], + [0237.chr('utf-8'), '"\u009F"'], + ].should be_computed_by(:inspect) + end + + it "returns a string with a NUL character replaced by \\u notation" do + 0.chr('utf-8').inspect.should == '"\\u0000"' + end + + it "returns a string with extended characters for Unicode strings" do + [ [0240.chr('utf-8'), '" "'], + [0241.chr('utf-8'), '"¡"'], + [0242.chr('utf-8'), '"¢"'], + [0243.chr('utf-8'), '"£"'], + [0244.chr('utf-8'), '"¤"'], + [0245.chr('utf-8'), '"¥"'], + [0246.chr('utf-8'), '"¦"'], + [0247.chr('utf-8'), '"§"'], + [0250.chr('utf-8'), '"¨"'], + [0251.chr('utf-8'), '"©"'], + [0252.chr('utf-8'), '"ª"'], + [0253.chr('utf-8'), '"«"'], + [0254.chr('utf-8'), '"¬"'], + [0255.chr('utf-8'), '""'], + [0256.chr('utf-8'), '"®"'], + [0257.chr('utf-8'), '"¯"'], + [0260.chr('utf-8'), '"°"'], + [0261.chr('utf-8'), '"±"'], + [0262.chr('utf-8'), '"²"'], + [0263.chr('utf-8'), '"³"'], + [0264.chr('utf-8'), '"´"'], + [0265.chr('utf-8'), '"µ"'], + [0266.chr('utf-8'), '"¶"'], + [0267.chr('utf-8'), '"·"'], + [0270.chr('utf-8'), '"¸"'], + [0271.chr('utf-8'), '"¹"'], + [0272.chr('utf-8'), '"º"'], + [0273.chr('utf-8'), '"»"'], + [0274.chr('utf-8'), '"¼"'], + [0275.chr('utf-8'), '"½"'], + [0276.chr('utf-8'), '"¾"'], + [0277.chr('utf-8'), '"¿"'], + [0300.chr('utf-8'), '"À"'], + [0301.chr('utf-8'), '"Á"'], + [0302.chr('utf-8'), '"Â"'], + [0303.chr('utf-8'), '"Ã"'], + [0304.chr('utf-8'), '"Ä"'], + [0305.chr('utf-8'), '"Å"'], + [0306.chr('utf-8'), '"Æ"'], + [0307.chr('utf-8'), '"Ç"'], + [0310.chr('utf-8'), '"È"'], + [0311.chr('utf-8'), '"É"'], + [0312.chr('utf-8'), '"Ê"'], + [0313.chr('utf-8'), '"Ë"'], + [0314.chr('utf-8'), '"Ì"'], + [0315.chr('utf-8'), '"Í"'], + [0316.chr('utf-8'), '"Î"'], + [0317.chr('utf-8'), '"Ï"'], + [0320.chr('utf-8'), '"Ð"'], + [0321.chr('utf-8'), '"Ñ"'], + [0322.chr('utf-8'), '"Ò"'], + [0323.chr('utf-8'), '"Ó"'], + [0324.chr('utf-8'), '"Ô"'], + [0325.chr('utf-8'), '"Õ"'], + [0326.chr('utf-8'), '"Ö"'], + [0327.chr('utf-8'), '"×"'], + [0330.chr('utf-8'), '"Ø"'], + [0331.chr('utf-8'), '"Ù"'], + [0332.chr('utf-8'), '"Ú"'], + [0333.chr('utf-8'), '"Û"'], + [0334.chr('utf-8'), '"Ü"'], + [0335.chr('utf-8'), '"Ý"'], + [0336.chr('utf-8'), '"Þ"'], + [0337.chr('utf-8'), '"ß"'], + [0340.chr('utf-8'), '"à"'], + [0341.chr('utf-8'), '"á"'], + [0342.chr('utf-8'), '"â"'], + [0343.chr('utf-8'), '"ã"'], + [0344.chr('utf-8'), '"ä"'], + [0345.chr('utf-8'), '"å"'], + [0346.chr('utf-8'), '"æ"'], + [0347.chr('utf-8'), '"ç"'], + [0350.chr('utf-8'), '"è"'], + [0351.chr('utf-8'), '"é"'], + [0352.chr('utf-8'), '"ê"'], + [0353.chr('utf-8'), '"ë"'], + [0354.chr('utf-8'), '"ì"'], + [0355.chr('utf-8'), '"í"'], + [0356.chr('utf-8'), '"î"'], + [0357.chr('utf-8'), '"ï"'], + [0360.chr('utf-8'), '"ð"'], + [0361.chr('utf-8'), '"ñ"'], + [0362.chr('utf-8'), '"ò"'], + [0363.chr('utf-8'), '"ó"'], + [0364.chr('utf-8'), '"ô"'], + [0365.chr('utf-8'), '"õ"'], + [0366.chr('utf-8'), '"ö"'], + [0367.chr('utf-8'), '"÷"'], + [0370.chr('utf-8'), '"ø"'], + [0371.chr('utf-8'), '"ù"'], + [0372.chr('utf-8'), '"ú"'], + [0373.chr('utf-8'), '"û"'], + [0374.chr('utf-8'), '"ü"'], + [0375.chr('utf-8'), '"ý"'], + [0376.chr('utf-8'), '"þ"'], + [0377.chr('utf-8'), '"ÿ"'] + ].should be_computed_by(:inspect) + end + end + + describe "when the string's encoding is different than the result's encoding" do + describe "and the string's encoding is ASCII-compatible but the characters are non-ASCII" do + it "returns a string with the non-ASCII characters replaced by \\x notation" do + "\u{3042}".encode("EUC-JP").inspect.should == '"\\x{A4A2}"' + end + end + + describe "and the string has both ASCII-compatible and ASCII-incompatible chars" do + it "returns a string with the non-ASCII characters replaced by \\u notation" do + "hello привет".encode("utf-16le").inspect.should == '"hello \\u043F\\u0440\\u0438\\u0432\\u0435\\u0442"' + end + end + end +end diff --git a/spec/ruby/core/string/intern_spec.rb b/spec/ruby/core/string/intern_spec.rb new file mode 100644 index 0000000000..cd7dad4359 --- /dev/null +++ b/spec/ruby/core/string/intern_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/to_sym' + +describe "String#intern" do + it_behaves_like :string_to_sym, :intern +end diff --git a/spec/ruby/core/string/length_spec.rb b/spec/ruby/core/string/length_spec.rb new file mode 100644 index 0000000000..98cee1f03d --- /dev/null +++ b/spec/ruby/core/string/length_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/length' + +describe "String#length" do + it_behaves_like :string_length, :length +end diff --git a/spec/ruby/core/string/lines_spec.rb b/spec/ruby/core/string/lines_spec.rb new file mode 100644 index 0000000000..40ab5f71d8 --- /dev/null +++ b/spec/ruby/core/string/lines_spec.rb @@ -0,0 +1,19 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/each_line' + +describe "String#lines" do + it_behaves_like :string_each_line, :lines + + it "returns an array when no block given" do + ary = "hello world".lines(' ') + ary.should == ["hello ", "world"] + end + + context "when `chomp` keyword argument is passed" do + it "removes new line characters" do + "hello \nworld\n".lines(chomp: true).should == ["hello ", "world"] + "hello \r\nworld\r\n".lines(chomp: true).should == ["hello ", "world"] + end + end +end diff --git a/spec/ruby/core/string/ljust_spec.rb b/spec/ruby/core/string/ljust_spec.rb new file mode 100644 index 0000000000..47324c59d2 --- /dev/null +++ b/spec/ruby/core/string/ljust_spec.rb @@ -0,0 +1,100 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#ljust with length, padding" do + it "returns a new string of specified length with self left justified and padded with padstr" do + "hello".ljust(20, '1234').should == "hello123412341234123" + + "".ljust(1, "abcd").should == "a" + "".ljust(2, "abcd").should == "ab" + "".ljust(3, "abcd").should == "abc" + "".ljust(4, "abcd").should == "abcd" + "".ljust(6, "abcd").should == "abcdab" + + "OK".ljust(3, "abcd").should == "OKa" + "OK".ljust(4, "abcd").should == "OKab" + "OK".ljust(6, "abcd").should == "OKabcd" + "OK".ljust(8, "abcd").should == "OKabcdab" + end + + it "pads with whitespace if no padstr is given" do + "hello".ljust(20).should == "hello " + end + + it "returns self if it's longer than or as long as the specified length" do + "".ljust(0).should == "" + "".ljust(-1).should == "" + "hello".ljust(4).should == "hello" + "hello".ljust(-1).should == "hello" + "this".ljust(3).should == "this" + "radiology".ljust(8, '-').should == "radiology" + end + + it "tries to convert length to an integer using to_int" do + "^".ljust(3.8, "_^").should == "^_^" + + obj = mock('3') + obj.should_receive(:to_int).and_return(3) + + "o".ljust(obj, "_o").should == "o_o" + end + + it "raises a TypeError when length can't be converted to an integer" do + -> { "hello".ljust("x") }.should raise_error(TypeError) + -> { "hello".ljust("x", "y") }.should raise_error(TypeError) + -> { "hello".ljust([]) }.should raise_error(TypeError) + -> { "hello".ljust(mock('x')) }.should raise_error(TypeError) + end + + it "tries to convert padstr to a string using to_str" do + padstr = mock('123') + padstr.should_receive(:to_str).and_return("123") + + "hello".ljust(10, padstr).should == "hello12312" + end + + it "raises a TypeError when padstr can't be converted" do + -> { "hello".ljust(20, []) }.should raise_error(TypeError) + -> { "hello".ljust(20, Object.new)}.should raise_error(TypeError) + -> { "hello".ljust(20, mock('x')) }.should raise_error(TypeError) + end + + it "raises an ArgumentError when padstr is empty" do + -> { "hello".ljust(10, '') }.should raise_error(ArgumentError) + end + + it "returns String instances when called on subclasses" do + StringSpecs::MyString.new("").ljust(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").ljust(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + + "".ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + "foo".ljust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + end + + describe "with width" do + it "returns a String in the same encoding as the original" do + str = "abc".dup.force_encoding Encoding::IBM437 + result = str.ljust 5 + result.should == "abc " + result.encoding.should equal(Encoding::IBM437) + end + end + + describe "with width, pattern" do + it "returns a String in the compatible encoding" do + str = "abc".dup.force_encoding Encoding::IBM437 + result = str.ljust 5, "あ" + result.should == "abcああ" + result.encoding.should equal(Encoding::UTF_8) + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + pat = "ア".encode Encoding::EUC_JP + -> do + "あれ".ljust 5, pat + end.should raise_error(Encoding::CompatibilityError) + end + end +end diff --git a/spec/ruby/core/string/lstrip_spec.rb b/spec/ruby/core/string/lstrip_spec.rb new file mode 100644 index 0000000000..c83650207e --- /dev/null +++ b/spec/ruby/core/string/lstrip_spec.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/strip' + +describe "String#lstrip" do + it_behaves_like :string_strip, :lstrip + + it "returns a copy of self with leading whitespace removed" do + " hello ".lstrip.should == "hello " + " hello world ".lstrip.should == "hello world " + "\n\r\t\n\v\r hello world ".lstrip.should == "hello world " + "hello".lstrip.should == "hello" + " こにちわ".lstrip.should == "こにちわ" + end + + it "works with lazy substrings" do + " hello "[1...-1].lstrip.should == "hello " + " hello world "[1...-1].lstrip.should == "hello world " + "\n\r\t\n\v\r hello world "[1...-1].lstrip.should == "hello world " + " こにちわ "[1...-1].lstrip.should == "こにちわ" + end + + it "strips leading \\0" do + "\x00hello".lstrip.should == "hello" + "\000 \000hello\000 \000".lstrip.should == "hello\000 \000" + end +end + +describe "String#lstrip!" do + it "modifies self in place and returns self" do + a = " hello " + a.lstrip!.should equal(a) + a.should == "hello " + end + + it "returns nil if no modifications were made" do + a = "hello" + a.lstrip!.should == nil + a.should == "hello" + end + + it "makes a string empty if it is only whitespace" do + "".lstrip!.should == nil + " ".lstrip.should == "" + " ".lstrip.should == "" + end + + it "removes leading NULL bytes and whitespace" do + a = "\000 \000hello\000 \000" + a.lstrip! + a.should == "hello\000 \000" + end + + it "raises a FrozenError on a frozen instance that is modified" do + -> { " hello ".freeze.lstrip! }.should raise_error(FrozenError) + end + + # see [ruby-core:23657] + it "raises a FrozenError on a frozen instance that would not be modified" do + -> { "hello".freeze.lstrip! }.should raise_error(FrozenError) + -> { "".freeze.lstrip! }.should raise_error(FrozenError) + end + + it "raises an ArgumentError if the first non-space codepoint is invalid" do + s = "\xDFabc".force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + -> { s.lstrip! }.should raise_error(ArgumentError) + + s = " \xDFabc".force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + -> { s.lstrip! }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/match_spec.rb b/spec/ruby/core/string/match_spec.rb new file mode 100644 index 0000000000..5e988f34ca --- /dev/null +++ b/spec/ruby/core/string/match_spec.rb @@ -0,0 +1,167 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe :string_match_escaped_literal, shared: true do + not_supported_on :opal do + it "matches a literal Regexp that uses ASCII-only UTF-8 escape sequences" do + "a b".match(/([\u{20}-\u{7e}])/)[0].should == "a" + end + end +end + +describe "String#=~" do + it "behaves the same way as index() when given a regexp" do + ("rudder" =~ /udder/).should == "rudder".index(/udder/) + ("boat" =~ /[^fl]oat/).should == "boat".index(/[^fl]oat/) + ("bean" =~ /bag/).should == "bean".index(/bag/) + ("true" =~ /false/).should == "true".index(/false/) + end + + it "raises a TypeError if a obj is a string" do + -> { "some string" =~ "another string" }.should raise_error(TypeError) + -> { "a" =~ StringSpecs::MyString.new("b") }.should raise_error(TypeError) + end + + it "invokes obj.=~ with self if obj is neither a string nor regexp" do + str = "w00t" + obj = mock('x') + + obj.should_receive(:=~).with(str).any_number_of_times.and_return(true) + str.should =~ obj + + obj = mock('y') + obj.should_receive(:=~).with(str).any_number_of_times.and_return(false) + str.should_not =~ obj + end + + it "sets $~ to MatchData when there is a match and nil when there's none" do + 'hello' =~ /./ + $~[0].should == 'h' + + 'hello' =~ /not/ + $~.should == nil + end + + it "returns the character index of a found match" do + ("こにちわ" =~ /に/).should == 1 + end + +end + +describe "String#match" do + it "matches the pattern against self" do + 'hello'.match(/(.)\1/)[0].should == 'll' + end + + it_behaves_like :string_match_escaped_literal, :match + + describe "with [pattern, position]" do + describe "when given a positive position" do + it "matches the pattern against self starting at an optional index" do + "01234".match(/(.).(.)/, 1).captures.should == ["1", "3"] + end + + it "uses the start as a character offset" do + "零一二三四".match(/(.).(.)/, 1).captures.should == ["一", "三"] + end + end + + describe "when given a negative position" do + it "matches the pattern against self starting at an optional index" do + "01234".match(/(.).(.)/, -4).captures.should == ["1", "3"] + end + + it "uses the start as a character offset" do + "零一二三四".match(/(.).(.)/, -4).captures.should == ["一", "三"] + end + end + end + + describe "when passed a block" do + it "yields the MatchData" do + "abc".match(/./) {|m| ScratchPad.record m } + ScratchPad.recorded.should be_kind_of(MatchData) + end + + it "returns the block result" do + "abc".match(/./) { :result }.should == :result + end + + it "does not yield if there is no match" do + ScratchPad.record [] + "b".match(/a/) {|m| ScratchPad << m } + ScratchPad.recorded.should == [] + end + end + + it "tries to convert pattern to a string via to_str" do + obj = mock('.') + def obj.to_str() "." end + "hello".match(obj)[0].should == "h" + + obj = mock('.') + def obj.respond_to?(type, *) true end + def obj.method_missing(*args) "." end + "hello".match(obj)[0].should == "h" + end + + it "raises a TypeError if pattern is not a regexp or a string" do + -> { 'hello'.match(10) }.should raise_error(TypeError) + not_supported_on :opal do + -> { 'hello'.match(:ell) }.should raise_error(TypeError) + end + end + + it "converts string patterns to regexps without escaping" do + 'hello'.match('(.)\1')[0].should == 'll' + end + + it "returns nil if there's no match" do + 'hello'.match('xx').should == nil + end + + it "matches \\G at the start of the string" do + 'hello'.match(/\Gh/)[0].should == 'h' + 'hello'.match(/\Go/).should == nil + end + + it "sets $~ to MatchData of match or nil when there is none" do + 'hello'.match(/./) + $~[0].should == 'h' + Regexp.last_match[0].should == 'h' + + 'hello'.match(/X/) + $~.should == nil + Regexp.last_match.should == nil + end + + it "calls match on the regular expression" do + regexp = /./.dup + regexp.should_receive(:match).and_return(:foo) + 'hello'.match(regexp).should == :foo + end +end + +describe "String#match?" do + before :each do + # Resetting Regexp.last_match + /DONTMATCH/.match '' + end + + context "when matches the given regex" do + it "returns true but does not set Regexp.last_match" do + 'string'.match?(/string/i).should be_true + Regexp.last_match.should be_nil + end + end + + it "returns false when does not match the given regex" do + 'string'.match?(/STRING/).should be_false + end + + it "takes matching position as the 2nd argument" do + 'string'.match?(/str/i, 0).should be_true + 'string'.match?(/str/i, 1).should be_false + end +end diff --git a/spec/ruby/core/string/modulo_spec.rb b/spec/ruby/core/string/modulo_spec.rb new file mode 100644 index 0000000000..46e0aa0f36 --- /dev/null +++ b/spec/ruby/core/string/modulo_spec.rb @@ -0,0 +1,797 @@ +require_relative '../../spec_helper' +require_relative '../kernel/shared/sprintf' +require_relative '../kernel/shared/sprintf_encoding' +require_relative 'fixtures/classes' +require_relative '../../shared/hash/key_error' + +describe "String#%" do + it_behaves_like :kernel_sprintf, -> format, *args { + format % args + } + + it_behaves_like :kernel_sprintf_encoding, -> format, *args { + format % args + } +end + +# TODO: these specs are mostly redundant with kernel/shared/sprintf.rb specs. +# These specs should be moved there and deduplicated. +describe "String#%" do + context "when key is missing from passed-in hash" do + it_behaves_like :key_error, -> obj, key { "%{#{key}}" % obj }, { a: 5 } + end + + it "formats multiple expressions" do + ("%b %x %d %s" % [10, 10, 10, 10]).should == "1010 a 10 10" + end + + it "formats expressions mid string" do + ("hello %s!" % "world").should == "hello world!" + end + + it "formats %% into %" do + ("%d%% %s" % [10, "of chickens!"]).should == "10% of chickens!" + end + + describe "output's encoding" do + it "is the same as the format string if passed value is encoding-compatible" do + [Encoding::BINARY, Encoding::US_ASCII, Encoding::UTF_8, Encoding::SHIFT_JIS].each do |encoding| + ("hello %s!".encode(encoding) % "world").encoding.should == encoding + end + end + + it "negotiates a compatible encoding if necessary" do + ("hello %s" % 195.chr).encoding.should == Encoding::BINARY + ("hello %s".encode("shift_jis") % "wörld").encoding.should == Encoding::UTF_8 + end + + it "raises if a compatible encoding can't be found" do + -> { "hello %s".encode("utf-8") % "world".encode("UTF-16LE") }.should raise_error(Encoding::CompatibilityError) + end + end + + it "raises an error if single % appears at the end" do + -> { ("%" % []) }.should raise_error(ArgumentError) + -> { ("foo%" % [])}.should raise_error(ArgumentError) + end + + ruby_version_is ""..."3.4" do + it "formats single % character before a newline as literal %" do + ("%\n" % []).should == "%\n" + ("foo%\n" % []).should == "foo%\n" + ("%\n.3f" % 1.2).should == "%\n.3f" + end + + it "formats single % character before a NUL as literal %" do + ("%\0" % []).should == "%\0" + ("foo%\0" % []).should == "foo%\0" + ("%\0.3f" % 1.2).should == "%\0.3f" + end + + it "raises an error if single % appears anywhere else" do + -> { (" % " % []) }.should raise_error(ArgumentError) + -> { ("foo%quux" % []) }.should raise_error(ArgumentError) + end + + it "raises an error if NULL or \\n appear anywhere else in the format string" do + begin + old_debug, $DEBUG = $DEBUG, false + + -> { "%.\n3f" % 1.2 }.should raise_error(ArgumentError) + -> { "%.3\nf" % 1.2 }.should raise_error(ArgumentError) + -> { "%.\03f" % 1.2 }.should raise_error(ArgumentError) + -> { "%.3\0f" % 1.2 }.should raise_error(ArgumentError) + ensure + $DEBUG = old_debug + end + end + end + + ruby_version_is "3.4" do + it "raises an ArgumentError if % is not followed by a conversion specifier" do + -> { "%" % [] }.should raise_error(ArgumentError) + -> { "%\n" % [] }.should raise_error(ArgumentError) + -> { "%\0" % [] }.should raise_error(ArgumentError) + -> { " % " % [] }.should raise_error(ArgumentError) + -> { "%.\n3f" % 1.2 }.should raise_error(ArgumentError) + -> { "%.3\nf" % 1.2 }.should raise_error(ArgumentError) + -> { "%.\03f" % 1.2 }.should raise_error(ArgumentError) + -> { "%.3\0f" % 1.2 }.should raise_error(ArgumentError) + end + end + + it "ignores unused arguments when $DEBUG is false" do + begin + old_debug = $DEBUG + $DEBUG = false + + ("" % [1, 2, 3]).should == "" + ("%s" % [1, 2, 3]).should == "1" + ensure + $DEBUG = old_debug + end + end + + it "raises an ArgumentError for unused arguments when $DEBUG is true" do + begin + old_debug = $DEBUG + $DEBUG = true + s = $stderr + $stderr = IOStub.new + + -> { "" % [1, 2, 3] }.should raise_error(ArgumentError) + -> { "%s" % [1, 2, 3] }.should raise_error(ArgumentError) + ensure + $DEBUG = old_debug + $stderr = s + end + end + + it "always allows unused arguments when positional argument style is used" do + begin + old_debug = $DEBUG + $DEBUG = false + + ("%2$s" % [1, 2, 3]).should == "2" + $DEBUG = true + ("%2$s" % [1, 2, 3]).should == "2" + ensure + $DEBUG = old_debug + end + end + + ruby_version_is ""..."3.4" do + it "replaces trailing absolute argument specifier without type with percent sign" do + ("hello %1$" % "foo").should == "hello %" + end + end + + ruby_version_is "3.4" do + it "raises an ArgumentError if absolute argument specifier is followed by a conversion specifier" do + -> { "hello %1$" % "foo" }.should raise_error(ArgumentError) + end + end + + it "raises an ArgumentError when given invalid argument specifiers" do + -> { "%1" % [] }.should raise_error(ArgumentError) + -> { "%+" % [] }.should raise_error(ArgumentError) + -> { "%-" % [] }.should raise_error(ArgumentError) + -> { "%#" % [] }.should raise_error(ArgumentError) + -> { "%0" % [] }.should raise_error(ArgumentError) + -> { "%*" % [] }.should raise_error(ArgumentError) + -> { "%." % [] }.should raise_error(ArgumentError) + -> { "%_" % [] }.should raise_error(ArgumentError) + -> { "%0$s" % "x" }.should raise_error(ArgumentError) + -> { "%*0$s" % [5, "x"] }.should raise_error(ArgumentError) + -> { "%*1$.*0$1$s" % [1, 2, 3] }.should raise_error(ArgumentError) + end + + it "raises an ArgumentError when multiple positional argument tokens are given for one format specifier" do + -> { "%1$1$s" % "foo" }.should raise_error(ArgumentError) + end + + it "respects positional arguments and precision tokens given for one format specifier" do + ("%2$1d" % [1, 0]).should == "0" + ("%2$1d" % [0, 1]).should == "1" + + ("%2$.2f" % [1, 0]).should == "0.00" + ("%2$.2f" % [0, 1]).should == "1.00" + end + + it "allows more than one digit of position" do + ("%50$d" % (0..100).to_a).should == "49" + end + + it "raises an ArgumentError when multiple width star tokens are given for one format specifier" do + -> { "%**s" % [5, 5, 5] }.should raise_error(ArgumentError) + end + + it "raises an ArgumentError when a width star token is seen after a width token" do + -> { "%5*s" % [5, 5] }.should raise_error(ArgumentError) + end + + it "raises an ArgumentError when multiple precision tokens are given" do + -> { "%.5.5s" % 5 }.should raise_error(ArgumentError) + -> { "%.5.*s" % [5, 5] }.should raise_error(ArgumentError) + -> { "%.*.5s" % [5, 5] }.should raise_error(ArgumentError) + end + + it "raises an ArgumentError when there are less arguments than format specifiers" do + ("foo" % []).should == "foo" + -> { "%s" % [] }.should raise_error(ArgumentError) + -> { "%s %s" % [1] }.should raise_error(ArgumentError) + end + + it "raises an ArgumentError when absolute and relative argument numbers are mixed" do + -> { "%s %1$s" % "foo" }.should raise_error(ArgumentError) + -> { "%1$s %s" % "foo" }.should raise_error(ArgumentError) + + -> { "%s %2$s" % ["foo", "bar"] }.should raise_error(ArgumentError) + -> { "%2$s %s" % ["foo", "bar"] }.should raise_error(ArgumentError) + + -> { "%*2$s" % [5, 5, 5] }.should raise_error(ArgumentError) + -> { "%*.*2$s" % [5, 5, 5] }.should raise_error(ArgumentError) + -> { "%*2$.*2$s" % [5, 5, 5] }.should raise_error(ArgumentError) + -> { "%*.*2$s" % [5, 5, 5] }.should raise_error(ArgumentError) + end + + it "allows reuse of the one argument multiple via absolute argument numbers" do + ("%1$s %1$s" % "foo").should == "foo foo" + ("%1$s %2$s %1$s %2$s" % ["foo", "bar"]).should == "foo bar foo bar" + end + + it "always interprets an array argument as a list of argument parameters" do + -> { "%p" % [] }.should raise_error(ArgumentError) + ("%p" % [1]).should == "1" + ("%p %p" % [1, 2]).should == "1 2" + end + + it "always interprets an array subclass argument as a list of argument parameters" do + -> { "%p" % StringSpecs::MyArray[] }.should raise_error(ArgumentError) + ("%p" % StringSpecs::MyArray[1]).should == "1" + ("%p %p" % StringSpecs::MyArray[1, 2]).should == "1 2" + end + + it "allows positional arguments for width star and precision star arguments" do + ("%*1$.*2$3$d" % [10, 5, 1]).should == " 00001" + end + + it "allows negative width to imply '-' flag" do + ("%*1$.*2$3$d" % [-10, 5, 1]).should == "00001 " + ("%-*1$.*2$3$d" % [10, 5, 1]).should == "00001 " + ("%-*1$.*2$3$d" % [-10, 5, 1]).should == "00001 " + end + + it "ignores negative precision" do + ("%*1$.*2$3$d" % [10, -5, 1]).should == " 1" + end + + it "allows a star to take an argument number to use as the width" do + ("%1$*2$s" % ["a", 8]).should == " a" + ("%1$*10$s" % ["a",0,0,0,0,0,0,0,0,8]).should == " a" + end + + it "calls to_int on width star and precision star tokens" do + w = mock('10') + w.should_receive(:to_int).and_return(10) + + p = mock('5') + p.should_receive(:to_int).and_return(5) + + ("%*.*f" % [w, p, 1]).should == " 1.00000" + + + w = mock('10') + w.should_receive(:to_int).and_return(10) + + p = mock('5') + p.should_receive(:to_int).and_return(5) + + ("%*.*d" % [w, p, 1]).should == " 00001" + end + + it "does not call #to_a to convert the argument" do + x = mock("string modulo to_a") + x.should_not_receive(:to_a) + x.should_receive(:to_s).and_return("x") + + ("%s" % x).should == "x" + end + + it "calls #to_ary to convert the argument" do + x = mock("string modulo to_ary") + x.should_not_receive(:to_s) + x.should_receive(:to_ary).and_return(["x"]) + + ("%s" % x).should == "x" + end + + it "wraps the object in an Array if #to_ary returns nil" do + x = mock("string modulo to_ary") + x.should_receive(:to_ary).and_return(nil) + x.should_receive(:to_s).and_return("x") + + ("%s" % x).should == "x" + end + + it "raises a TypeError if #to_ary does not return an Array" do + x = mock("string modulo to_ary") + x.should_receive(:to_ary).and_return("x") + + -> { "%s" % x }.should raise_error(TypeError) + end + + it "tries to convert the argument to Array by calling #to_ary" do + obj = mock('[1,2]') + def obj.to_ary() [1, 2] end + def obj.to_s() "obj" end + ("%s %s" % obj).should == "1 2" + ("%s" % obj).should == "1" + end + + it "doesn't return subclass instances when called on a subclass" do + universal = mock('0') + def universal.to_int() 0 end + def universal.to_str() "0" end + def universal.to_f() 0.0 end + + [ + "", "foo", + "%b", "%B", "%c", "%d", "%e", "%E", + "%f", "%g", "%G", "%i", "%o", "%p", + "%s", "%u", "%x", "%X" + ].each do |format| + (StringSpecs::MyString.new(format) % universal).should be_an_instance_of(String) + end + end + + it "supports binary formats using %b for positive numbers" do + ("%b" % 10).should == "1010" + ("% b" % 10).should == " 1010" + ("%1$b" % [10, 20]).should == "1010" + ("%#b" % 10).should == "0b1010" + ("%+b" % 10).should == "+1010" + ("%-9b" % 10).should == "1010 " + ("%05b" % 10).should == "01010" + ("%*b" % [10, 6]).should == " 110" + ("%*b" % [-10, 6]).should == "110 " + ("%.4b" % 2).should == "0010" + ("%.32b" % 2147483648).should == "10000000000000000000000000000000" + end + + it "supports binary formats using %b for negative numbers" do + ("%b" % -5).should == "..1011" + ("%0b" % -5).should == "..1011" + ("%.1b" % -5).should == "..1011" + ("%.7b" % -5).should == "..11011" + ("%.10b" % -5).should == "..11111011" + ("% b" % -5).should == "-101" + ("%+b" % -5).should == "-101" + not_supported_on :opal do + ("%b" % -(2 ** 64 + 5)).should == + "..101111111111111111111111111111111111111111111111111111111111111011" + end + end + + it "supports binary formats using %B with same behaviour as %b except for using 0B instead of 0b for #" do + ("%B" % 10).should == ("%b" % 10) + ("% B" % 10).should == ("% b" % 10) + ("%1$B" % [10, 20]).should == ("%1$b" % [10, 20]) + ("%+B" % 10).should == ("%+b" % 10) + ("%-9B" % 10).should == ("%-9b" % 10) + ("%05B" % 10).should == ("%05b" % 10) + ("%*B" % [10, 6]).should == ("%*b" % [10, 6]) + ("%*B" % [-10, 6]).should == ("%*b" % [-10, 6]) + + ("%B" % -5).should == ("%b" % -5) + ("%0B" % -5).should == ("%0b" % -5) + ("%.1B" % -5).should == ("%.1b" % -5) + ("%.7B" % -5).should == ("%.7b" % -5) + ("%.10B" % -5).should == ("%.10b" % -5) + ("% B" % -5).should == ("% b" % -5) + ("%+B" % -5).should == ("%+b" % -5) + not_supported_on :opal do + ("%B" % -(2 ** 64 + 5)).should == ("%b" % -(2 ** 64 + 5)) + end + + ("%#B" % 10).should == "0B1010" + end + + it "supports character formats using %c" do + ("%c" % 10).should == "\n" + ("%2$c" % [10, 11, 14]).should == "\v" + ("%-4c" % 10).should == "\n " + ("%*c" % [10, 3]).should == " \003" + ("%c" % 42).should == "*" + + -> { "%c" % Object }.should raise_error(TypeError) + end + + it "supports single character strings as argument for %c" do + ("%c" % 'A').should == "A" + end + + it "supports only the first character as argument for %c" do + ("%c" % 'AA').should == "A" + end + + it "calls to_str on argument for %c formats" do + obj = mock('A') + obj.should_receive(:to_str).and_return('A') + + ("%c" % obj).should == "A" + end + + it "calls #to_ary on argument for %c formats" do + obj = mock('65') + obj.should_receive(:to_ary).and_return([65]) + ("%c" % obj).should == ("%c" % [65]) + end + + it "calls #to_int on argument for %c formats, if the argument does not respond to #to_ary" do + obj = mock('65') + obj.should_receive(:to_int).and_return(65) + + ("%c" % obj).should == ("%c" % 65) + end + + %w(d i).each do |f| + format = "%" + f + + it "supports integer formats using #{format}" do + ("%#{f}" % 10).should == "10" + ("% #{f}" % 10).should == " 10" + ("%1$#{f}" % [10, 20]).should == "10" + ("%+#{f}" % 10).should == "+10" + ("%-7#{f}" % 10).should == "10 " + ("%04#{f}" % 10).should == "0010" + ("%*#{f}" % [10, 4]).should == " 4" + ("%6.4#{f}" % 123).should == " 0123" + end + + it "supports negative integers using #{format}" do + ("%#{f}" % -5).should == "-5" + ("%3#{f}" % -5).should == " -5" + ("%03#{f}" % -5).should == "-05" + ("%+03#{f}" % -5).should == "-05" + ("%+.2#{f}" % -5).should == "-05" + ("%-3#{f}" % -5).should == "-5 " + ("%6.4#{f}" % -123).should == " -0123" + end + + it "supports negative integers using #{format}, giving priority to `-`" do + ("%-03#{f}" % -5).should == "-5 " + ("%+-03#{f}" % -5).should == "-5 " + end + end + + it "supports float formats using %e" do + ("%e" % 10).should == "1.000000e+01" + ("% e" % 10).should == " 1.000000e+01" + ("%1$e" % 10).should == "1.000000e+01" + ("%#e" % 10).should == "1.000000e+01" + ("%+e" % 10).should == "+1.000000e+01" + ("%-7e" % 10).should == "1.000000e+01" + ("%05e" % 10).should == "1.000000e+01" + ("%*e" % [10, 9]).should == "9.000000e+00" + end + + it "supports float formats using %e, but Inf, -Inf, and NaN are not floats" do + ("%e" % 1e1020).should == "Inf" + ("%e" % -1e1020).should == "-Inf" + ("%e" % -Float::NAN).should == "NaN" + ("%e" % Float::NAN).should == "NaN" + end + + it "supports float formats using %E, but Inf, -Inf, and NaN are not floats" do + ("%E" % 1e1020).should == "Inf" + ("%E" % -1e1020).should == "-Inf" + ("%-10E" % 1e1020).should == "Inf " + ("%10E" % 1e1020).should == " Inf" + ("%+E" % 1e1020).should == "+Inf" + ("% E" % 1e1020).should == " Inf" + ("%E" % Float::NAN).should == "NaN" + ("%E" % -Float::NAN).should == "NaN" + end + + it "supports float formats using %E" do + ("%E" % 10).should == "1.000000E+01" + ("% E" % 10).should == " 1.000000E+01" + ("%1$E" % 10).should == "1.000000E+01" + ("%#E" % 10).should == "1.000000E+01" + ("%+E" % 10).should == "+1.000000E+01" + ("%-7E" % 10).should == "1.000000E+01" + ("%05E" % 10).should == "1.000000E+01" + ("%*E" % [10, 9]).should == "9.000000E+00" + end + + it "pads with spaces for %E with Inf, -Inf, and NaN" do + ("%010E" % -1e1020).should == " -Inf" + ("%010E" % 1e1020).should == " Inf" + ("%010E" % Float::NAN).should == " NaN" + end + + it "supports float formats using %f" do + ("%f" % 10).should == "10.000000" + ("% f" % 10).should == " 10.000000" + ("%1$f" % 10).should == "10.000000" + ("%#f" % 10).should == "10.000000" + ("%#0.3f" % 10).should == "10.000" + ("%+f" % 10).should == "+10.000000" + ("%-7f" % 10).should == "10.000000" + ("%05f" % 10).should == "10.000000" + ("%0.5f" % 10).should == "10.00000" + ("%*f" % [10, 9]).should == " 9.000000" + end + + it "supports float formats using %g" do + ("%g" % 10).should == "10" + ("% g" % 10).should == " 10" + ("%1$g" % 10).should == "10" + ("%#g" % 10).should == "10.0000" + ("%#.3g" % 10).should == "10.0" + ("%+g" % 10).should == "+10" + ("%-7g" % 10).should == "10 " + ("%05g" % 10).should == "00010" + ("%g" % 10**10).should == "1e+10" + ("%*g" % [10, 9]).should == " 9" + end + + it "supports float formats using %G" do + ("%G" % 10).should == "10" + ("% G" % 10).should == " 10" + ("%1$G" % 10).should == "10" + ("%#G" % 10).should == "10.0000" + ("%#.3G" % 10).should == "10.0" + ("%+G" % 10).should == "+10" + ("%-7G" % 10).should == "10 " + ("%05G" % 10).should == "00010" + ("%G" % 10**10).should == "1E+10" + ("%*G" % [10, 9]).should == " 9" + end + + it "supports octal formats using %o for positive numbers" do + ("%o" % 10).should == "12" + ("% o" % 10).should == " 12" + ("%1$o" % [10, 20]).should == "12" + ("%#o" % 10).should == "012" + ("%+o" % 10).should == "+12" + ("%-9o" % 10).should == "12 " + ("%05o" % 10).should == "00012" + ("%*o" % [10, 6]).should == " 6" + end + + it "supports octal formats using %o for negative numbers" do + # These are incredibly wrong. -05 == -5, not 7177777...whatever + ("%o" % -5).should == "..73" + ("%0o" % -5).should == "..73" + ("%.4o" % 20).should == "0024" + ("%.1o" % -5).should == "..73" + ("%.7o" % -5).should == "..77773" + ("%.10o" % -5).should == "..77777773" + + ("% o" % -26).should == "-32" + ("%+o" % -26).should == "-32" + not_supported_on :opal do + ("%o" % -(2 ** 64 + 5)).should == "..75777777777777777777773" + end + end + + it "supports inspect formats using %p" do + ("%p" % 10).should == "10" + ("%1$p" % [10, 5]).should == "10" + ("%-22p" % 10).should == "10 " + ("%*p" % [10, 10]).should == " 10" + ("%p" % {capture: 1}).should == {capture: 1}.inspect + ("%p" % "str").should == "\"str\"" + end + + it "calls inspect on arguments for %p format" do + obj = mock('obj') + def obj.inspect() "obj" end + ("%p" % obj).should == "obj" + + # undef is not working + # obj = mock('obj') + # class << obj; undef :inspect; end + # def obj.method_missing(*args) "obj" end + # ("%p" % obj).should == "obj" + end + + it "supports string formats using %s" do + ("%s" % "hello").should == "hello" + ("%s" % "").should == "" + ("%s" % 10).should == "10" + ("%1$s" % [10, 8]).should == "10" + ("%-5s" % 10).should == "10 " + ("%*s" % [10, 9]).should == " 9" + end + + it "respects a space padding request not as part of the width" do + x = "% -5s" % ["foo"] + x.should == "foo " + end + + it "calls to_s on non-String arguments for %s format" do + obj = mock('obj') + def obj.to_s() "obj" end + + ("%s" % obj).should == "obj" + + # undef doesn't work + # obj = mock('obj') + # class << obj; undef :to_s; end + # def obj.method_missing(*args) "obj" end + # + # ("%s" % obj).should == "obj" + end + + # MRI crashes on this one. + # See http://groups.google.com/group/ruby-core-google/t/c285c18cd94c216d + it "raises an ArgumentError for huge precisions for %s" do + block = -> { "%.25555555555555555555555555555555555555s" % "hello world" } + block.should raise_error(ArgumentError) + end + + # Note: %u has been changed to an alias for %d in 1.9. + it "supports unsigned formats using %u" do + ("%u" % 10).should == "10" + ("% u" % 10).should == " 10" + ("%1$u" % [10, 20]).should == "10" + ("%+u" % 10).should == "+10" + ("%-7u" % 10).should == "10 " + ("%04u" % 10).should == "0010" + ("%*u" % [10, 4]).should == " 4" + end + + it "formats negative values with a leading sign using %u" do + ("% u" % -26).should == "-26" + ("%+u" % -26).should == "-26" + end + + it "supports negative bignums with %u or %d" do + ("%u" % -(2 ** 64 + 5)).should == "-18446744073709551621" + ("%d" % -(2 ** 64 + 5)).should == "-18446744073709551621" + end + + it "supports hex formats using %x for positive numbers" do + ("%x" % 10).should == "a" + ("% x" % 10).should == " a" + ("%1$x" % [10, 20]).should == "a" + ("%#x" % 10).should == "0xa" + ("%+x" % 10).should == "+a" + ("%-9x" % 10).should == "a " + ("%05x" % 10).should == "0000a" + ("%*x" % [10, 6]).should == " 6" + ("%.4x" % 20).should == "0014" + ("%x" % 0xFFFFFFFF).should == "ffffffff" + end + + it "supports hex formats using %x for negative numbers" do + ("%x" % -5).should == "..fb" + ("%0x" % -5).should == "..fb" + ("%.1x" % -5).should == "..fb" + ("%.7x" % -5).should == "..ffffb" + ("%.10x" % -5).should == "..fffffffb" + ("% x" % -26).should == "-1a" + ("%+x" % -26).should == "-1a" + not_supported_on :opal do + ("%x" % -(2 ** 64 + 5)).should == "..fefffffffffffffffb" + end + end + + it "supports hex formats using %X for positive numbers" do + ("%X" % 10).should == "A" + ("% X" % 10).should == " A" + ("%1$X" % [10, 20]).should == "A" + ("%#X" % 10).should == "0XA" + ("%+X" % 10).should == "+A" + ("%-9X" % 10).should == "A " + ("%05X" % 10).should == "0000A" + ("%*X" % [10, 6]).should == " 6" + ("%X" % 0xFFFFFFFF).should == "FFFFFFFF" + end + + it "supports hex formats using %X for negative numbers" do + ("%X" % -5).should == "..FB" + ("%0X" % -5).should == "..FB" + ("%.1X" % -5).should == "..FB" + ("%.7X" % -5).should == "..FFFFB" + ("%.10X" % -5).should == "..FFFFFFFB" + ("% X" % -26).should == "-1A" + ("%+X" % -26).should == "-1A" + not_supported_on :opal do + ("%X" % -(2 ** 64 + 5)).should == "..FEFFFFFFFFFFFFFFFB" + end + end + + it "formats zero without prefix using %#x" do + ("%#x" % 0).should == "0" + end + + it "formats zero without prefix using %#X" do + ("%#X" % 0).should == "0" + end + + %w(b d i o u x X).each do |f| + format = "%" + f + + it "behaves as if calling Kernel#Integer for #{format} argument, if it does not respond to #to_ary" do + (format % "10").should == (format % Kernel.Integer("10")) + (format % "0x42").should == (format % Kernel.Integer("0x42")) + (format % "0b1101").should == (format % Kernel.Integer("0b1101")) + (format % "0b1101_0000").should == (format % Kernel.Integer("0b1101_0000")) + (format % "0777").should == (format % Kernel.Integer("0777")) + -> { + # see [ruby-core:14139] for more details + (format % "0777").should == (format % Kernel.Integer("0777")) + }.should_not raise_error(ArgumentError) + + -> { format % "0__7_7_7" }.should raise_error(ArgumentError) + + -> { format % "" }.should raise_error(ArgumentError) + -> { format % "x" }.should raise_error(ArgumentError) + -> { format % "5x" }.should raise_error(ArgumentError) + -> { format % "08" }.should raise_error(ArgumentError) + -> { format % "0b2" }.should raise_error(ArgumentError) + -> { format % "123__456" }.should raise_error(ArgumentError) + + obj = mock('5') + obj.should_receive(:to_i).and_return(5) + (format % obj).should == (format % 5) + + obj = mock('6') + obj.stub!(:to_i).and_return(5) + obj.should_receive(:to_int).and_return(6) + (format % obj).should == (format % 6) + end + end + + %w(e E f g G).each do |f| + format = "%" + f + + it "tries to convert the passed argument to an Array using #to_ary" do + obj = mock('3.14') + obj.should_receive(:to_ary).and_return([3.14]) + (format % obj).should == (format % [3.14]) + end + + it "behaves as if calling Kernel#Float for #{format} arguments, when the passed argument does not respond to #to_ary" do + (format % 10).should == (format % 10.0) + (format % "-10.4e-20").should == (format % -10.4e-20) + (format % ".5").should == (format % 0.5) + (format % "-.5").should == (format % -0.5) + + ruby_version_is "3.4" do + (format % "10.").should == (format % 10) + end + + # Something's strange with this spec: + # it works just fine in individual mode, but not when run as part of a group + (format % "10_1_0.5_5_5").should == (format % 1010.555) + + (format % "0777").should == (format % 777) + + -> { format % "" }.should raise_error(ArgumentError) + -> { format % "x" }.should raise_error(ArgumentError) + -> { format % "." }.should raise_error(ArgumentError) + -> { format % "5x" }.should raise_error(ArgumentError) + -> { format % "0b1" }.should raise_error(ArgumentError) + -> { format % "10e10.5" }.should raise_error(ArgumentError) + -> { format % "10__10" }.should raise_error(ArgumentError) + -> { format % "10.10__10" }.should raise_error(ArgumentError) + + obj = mock('5.0') + obj.should_receive(:to_f).and_return(5.0) + (format % obj).should == (format % 5.0) + end + + it "behaves as if calling Kernel#Float for #{format} arguments, when the passed argument is hexadecimal string" do + (format % "0xA").should == (format % 0xA) + end + end + + describe "when format string contains %{} sections" do + it "replaces %{} sections with values from passed-in hash" do + ("%{foo}bar" % {foo: 'oof'}).should == "oofbar" + end + + it "should raise ArgumentError if no hash given" do + -> {"%{foo}" % []}.should raise_error(ArgumentError) + end + end + + describe "when format string contains %<> formats" do + it "uses the named argument for the format's value" do + ("%<foo>d" % {foo: 1}).should == "1" + end + + it "raises KeyError if key is missing from passed-in hash" do + -> {"%<foo>d" % {}}.should raise_error(KeyError) + end + + it "should raise ArgumentError if no hash given" do + -> {"%<foo>" % []}.should raise_error(ArgumentError) + end + end +end diff --git a/spec/ruby/core/string/multiply_spec.rb b/spec/ruby/core/string/multiply_spec.rb new file mode 100644 index 0000000000..c15f670c46 --- /dev/null +++ b/spec/ruby/core/string/multiply_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative '../../shared/string/times' + +describe "String#*" do + it_behaves_like :string_times, :*, -> str, times { str * times } +end diff --git a/spec/ruby/core/string/new_spec.rb b/spec/ruby/core/string/new_spec.rb new file mode 100644 index 0000000000..ca678f5323 --- /dev/null +++ b/spec/ruby/core/string/new_spec.rb @@ -0,0 +1,61 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String.new" do + it "returns an instance of String" do + str = String.new + str.should be_an_instance_of(String) + end + + it "accepts an encoding argument" do + xA4xA2 = [0xA4, 0xA2].pack('CC').force_encoding 'utf-8' + str = String.new(xA4xA2, encoding: 'euc-jp') + str.encoding.should == Encoding::EUC_JP + end + + it "accepts a capacity argument" do + String.new("", capacity: 100_000).should == "" + String.new("abc", capacity: 100_000).should == "abc" + end + + it "returns a fully-formed String" do + str = String.new + str.size.should == 0 + str << "more" + str.should == "more" + end + + it "returns a new string given a string argument" do + str1 = "test" + str = String.new(str1) + str.should be_an_instance_of(String) + str.should == str1 + str << "more" + str.should == "testmore" + end + + it "returns an instance of a subclass" do + a = StringSpecs::MyString.new("blah") + a.should be_an_instance_of(StringSpecs::MyString) + a.should == "blah" + end + + it "is called on subclasses" do + s = StringSpecs::SubString.new + s.special.should == nil + s.should == "" + + s = StringSpecs::SubString.new "subclass" + s.special.should == "subclass" + s.should == "" + end + + it "raises TypeError on inconvertible object" do + -> { String.new 5 }.should raise_error(TypeError) + -> { String.new nil }.should raise_error(TypeError) + end + + it "returns a binary String" do + String.new.encoding.should == Encoding::BINARY + end +end diff --git a/spec/ruby/core/string/next_spec.rb b/spec/ruby/core/string/next_spec.rb new file mode 100644 index 0000000000..fcd3e5ef90 --- /dev/null +++ b/spec/ruby/core/string/next_spec.rb @@ -0,0 +1,11 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/succ' + +describe "String#next" do + it_behaves_like :string_succ, :next +end + +describe "String#next!" do + it_behaves_like :string_succ_bang, :"next!" +end diff --git a/spec/ruby/core/string/oct_spec.rb b/spec/ruby/core/string/oct_spec.rb new file mode 100644 index 0000000000..7637692217 --- /dev/null +++ b/spec/ruby/core/string/oct_spec.rb @@ -0,0 +1,88 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +# Note: We can't completely spec this in terms of to_int() because hex() +# allows the base to be changed by a base specifier in the string. +# See http://groups.google.com/group/ruby-core-google/browse_frm/thread/b53e9c2003425703 +describe "String#oct" do + it "treats numeric digits as base-8 digits by default" do + "0".oct.should == 0 + "77".oct.should == 077 + "077".oct.should == 077 + end + + it "accepts numbers formatted as binary" do + "0b1010".oct.should == 0b1010 + end + + it "accepts numbers formatted as hexadecimal" do + "0xFF".oct.should == 0xFF + end + + it "accepts numbers formatted as decimal" do + "0d500".oct.should == 500 + end + + describe "with a leading minus sign" do + it "treats numeric digits as base-8 digits by default" do + "-12348".oct.should == -01234 + end + + it "accepts numbers formatted as binary" do + "-0b0101".oct.should == -0b0101 + end + + it "accepts numbers formatted as hexadecimal" do + "-0xEE".oct.should == -0xEE + end + + it "accepts numbers formatted as decimal" do + "-0d500".oct.should == -500 + end + end + + describe "with a leading plus sign" do + it "treats numeric digits as base-8 digits by default" do + "+12348".oct.should == 01234 + end + + it "accepts numbers formatted as binary" do + "+0b1010".oct.should == 0b1010 + end + + it "accepts numbers formatted as hexadecimal" do + "+0xFF".oct.should == 0xFF + end + + it "accepts numbers formatted as decimal" do + "+0d500".oct.should == 500 + end + end + + it "accepts a single underscore separating digits" do + "755_333".oct.should == 0755_333 + end + + it "does not accept a sequence of underscores as part of a number" do + "7__3".oct.should == 07 + "7___3".oct.should == 07 + "7__5".oct.should == 07 + end + + it "ignores characters that are incorrect for the base-8 digits" do + "0o".oct.should == 0 + "5678".oct.should == 0567 + end + + it "returns 0 if no characters can be interpreted as a base-8 number" do + "".oct.should == 0 + "+-5".oct.should == 0 + "wombat".oct.should == 0 + end + + it "returns 0 for strings with leading underscores" do + "_7".oct.should == 0 + "_07".oct.should == 0 + " _7".oct.should == 0 + end +end diff --git a/spec/ruby/core/string/ord_spec.rb b/spec/ruby/core/string/ord_spec.rb new file mode 100644 index 0000000000..35af3b5458 --- /dev/null +++ b/spec/ruby/core/string/ord_spec.rb @@ -0,0 +1,33 @@ +require_relative '../../spec_helper' + +describe "String#ord" do + it "returns an Integer" do + 'a'.ord.should be_an_instance_of(Integer) + end + + it "returns the codepoint of the first character in the String" do + 'a'.ord.should == 97 + end + + + it "ignores subsequent characters" do + "\u{287}a".ord.should == "\u{287}".ord + end + + it "understands multibyte characters" do + "\u{9879}".ord.should == 39033 + end + + it "is equivalent to #codepoints.first" do + "\u{981}\u{982}".ord.should == "\u{981}\u{982}".codepoints.first + end + + it "raises an ArgumentError if called on an empty String" do + -> { ''.ord }.should raise_error(ArgumentError) + end + + it "raises ArgumentError if the character is broken" do + s = "©".dup.force_encoding("US-ASCII") + -> { s.ord }.should raise_error(ArgumentError, "invalid byte sequence in US-ASCII") + end +end diff --git a/spec/ruby/core/string/partition_spec.rb b/spec/ruby/core/string/partition_spec.rb new file mode 100644 index 0000000000..d5370dcc73 --- /dev/null +++ b/spec/ruby/core/string/partition_spec.rb @@ -0,0 +1,63 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/partition' + +describe "String#partition with String" do + it_behaves_like :string_partition, :partition + + it "returns an array of substrings based on splitting on the given string" do + "hello world".partition("o").should == ["hell", "o", " world"] + end + + it "always returns 3 elements" do + "hello".partition("x").should == ["hello", "", ""] + "hello".partition("hello").should == ["", "hello", ""] + end + + it "accepts regexp" do + "hello!".partition(/l./).should == ["he", "ll", "o!"] + end + + it "sets global vars if regexp used" do + "hello!".partition(/(.l)(.o)/) + $1.should == "el" + $2.should == "lo" + end + + it "converts its argument using :to_str" do + find = mock('l') + find.should_receive(:to_str).and_return("l") + "hello".partition(find).should == ["he","l","lo"] + end + + it "raises an error if not convertible to string" do + ->{ "hello".partition(5) }.should raise_error(TypeError) + ->{ "hello".partition(nil) }.should raise_error(TypeError) + end + + it "takes precedence over a given block" do + "hello world".partition("o") { true }.should == ["hell", "o", " world"] + end + + it "handles a pattern in a superset encoding" do + string = "hello".dup.force_encoding(Encoding::US_ASCII) + + result = string.partition("é") + + result.should == ["hello", "", ""] + result[0].encoding.should == Encoding::US_ASCII + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + pattern = "o".dup.force_encoding(Encoding::US_ASCII) + + result = "héllo world".partition(pattern) + + result.should == ["héll", "o", " world"] + result[0].encoding.should == Encoding::UTF_8 + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::UTF_8 + end +end diff --git a/spec/ruby/core/string/plus_spec.rb b/spec/ruby/core/string/plus_spec.rb new file mode 100644 index 0000000000..9da17451c6 --- /dev/null +++ b/spec/ruby/core/string/plus_spec.rb @@ -0,0 +1,37 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/concat' + +describe "String#+" do + it_behaves_like :string_concat_encoding, :+ + it_behaves_like :string_concat_type_coercion, :+ + + it "returns a new string containing the given string concatenated to self" do + ("" + "").should == "" + ("" + "Hello").should == "Hello" + ("Hello" + "").should == "Hello" + ("Ruby !" + "= Rubinius").should == "Ruby != Rubinius" + end + + it "converts any non-String argument with #to_str" do + c = mock 'str' + c.should_receive(:to_str).any_number_of_times.and_return(' + 1 = 2') + + ("1" + c).should == '1 + 1 = 2' + end + + it "raises a TypeError when given any object that fails #to_str" do + -> { "" + Object.new }.should raise_error(TypeError) + -> { "" + 65 }.should raise_error(TypeError) + end + + it "doesn't return subclass instances" do + (StringSpecs::MyString.new("hello") + "").should be_an_instance_of(String) + (StringSpecs::MyString.new("hello") + "foo").should be_an_instance_of(String) + (StringSpecs::MyString.new("hello") + StringSpecs::MyString.new("foo")).should be_an_instance_of(String) + (StringSpecs::MyString.new("hello") + StringSpecs::MyString.new("")).should be_an_instance_of(String) + (StringSpecs::MyString.new("") + StringSpecs::MyString.new("")).should be_an_instance_of(String) + ("hello" + StringSpecs::MyString.new("foo")).should be_an_instance_of(String) + ("hello" + StringSpecs::MyString.new("")).should be_an_instance_of(String) + end +end diff --git a/spec/ruby/core/string/prepend_spec.rb b/spec/ruby/core/string/prepend_spec.rb new file mode 100644 index 0000000000..5248ea8056 --- /dev/null +++ b/spec/ruby/core/string/prepend_spec.rb @@ -0,0 +1,55 @@ +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#prepend" do + it "prepends the given argument to self and returns self" do + str = "world" + str.prepend("hello ").should equal(str) + str.should == "hello world" + end + + it "converts the given argument to a String using to_str" do + obj = mock("hello") + obj.should_receive(:to_str).and_return("hello") + a = " world!".prepend(obj) + a.should == "hello world!" + end + + it "raises a TypeError if the given argument can't be converted to a String" do + -> { "hello ".prepend [] }.should raise_error(TypeError) + -> { 'hello '.prepend mock('x') }.should raise_error(TypeError) + end + + it "raises a FrozenError when self is frozen" do + a = "hello" + a.freeze + + -> { a.prepend "" }.should raise_error(FrozenError) + -> { a.prepend "test" }.should raise_error(FrozenError) + end + + it "works when given a subclass instance" do + a = " world" + a.prepend StringSpecs::MyString.new("hello") + a.should == "hello world" + end + + it "takes multiple arguments" do + str = " world" + str.prepend "he", "", "llo" + str.should == "hello world" + end + + it "prepends the initial value when given arguments contain 2 self" do + str = "hello" + str.prepend str, str + str.should == "hellohellohello" + end + + it "returns self when given no arguments" do + str = "hello" + str.prepend.should equal(str) + str.should == "hello" + end +end diff --git a/spec/ruby/core/string/replace_spec.rb b/spec/ruby/core/string/replace_spec.rb new file mode 100644 index 0000000000..ef9bab4f3c --- /dev/null +++ b/spec/ruby/core/string/replace_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/replace' + +describe "String#replace" do + it_behaves_like :string_replace, :replace +end diff --git a/spec/ruby/core/string/reverse_spec.rb b/spec/ruby/core/string/reverse_spec.rb new file mode 100644 index 0000000000..aa6abe6036 --- /dev/null +++ b/spec/ruby/core/string/reverse_spec.rb @@ -0,0 +1,70 @@ +# encoding: utf-8 +# frozen_string_literal: false + +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#reverse" do + it "returns a new string with the characters of self in reverse order" do + "stressed".reverse.should == "desserts" + "m".reverse.should == "m" + "".reverse.should == "" + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("stressed").reverse.should be_an_instance_of(String) + StringSpecs::MyString.new("m").reverse.should be_an_instance_of(String) + StringSpecs::MyString.new("").reverse.should be_an_instance_of(String) + end + + it "reverses a string with multi byte characters" do + "微軟正黑體".reverse.should == "體黑正軟微" + end + + it "works with a broken string" do + str = "微軟\xDF\xDE正黑體".force_encoding(Encoding::UTF_8) + + str.valid_encoding?.should be_false + + str.reverse.should == "體黑正\xDE\xDF軟微" + end + + it "returns a String in the same encoding as self" do + "stressed".encode("US-ASCII").reverse.encoding.should == Encoding::US_ASCII + end +end + +describe "String#reverse!" do + it "reverses self in place and always returns self" do + a = "stressed" + a.reverse!.should equal(a) + a.should == "desserts" + + "".reverse!.should == "" + end + + it "raises a FrozenError on a frozen instance that is modified" do + -> { "anna".freeze.reverse! }.should raise_error(FrozenError) + -> { "hello".freeze.reverse! }.should raise_error(FrozenError) + end + + # see [ruby-core:23666] + it "raises a FrozenError on a frozen instance that would not be modified" do + -> { "".freeze.reverse! }.should raise_error(FrozenError) + end + + it "reverses a string with multi byte characters" do + str = "微軟正黑體" + str.reverse! + str.should == "體黑正軟微" + end + + it "works with a broken string" do + str = "微軟\xDF\xDE正黑體".force_encoding(Encoding::UTF_8) + + str.valid_encoding?.should be_false + str.reverse! + + str.should == "體黑正\xDE\xDF軟微" + end +end diff --git a/spec/ruby/core/string/rindex_spec.rb b/spec/ruby/core/string/rindex_spec.rb new file mode 100644 index 0000000000..0863a9c3be --- /dev/null +++ b/spec/ruby/core/string/rindex_spec.rb @@ -0,0 +1,384 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#rindex with object" do + it "raises a TypeError if obj isn't a String or Regexp" do + not_supported_on :opal do + -> { "hello".rindex(:sym) }.should raise_error(TypeError) + end + -> { "hello".rindex(mock('x')) }.should raise_error(TypeError) + end + + it "raises a TypeError if obj is an Integer" do + -> { "hello".rindex(42) }.should raise_error(TypeError) + end + + it "doesn't try to convert obj to an integer via to_int" do + obj = mock('x') + obj.should_not_receive(:to_int) + -> { "hello".rindex(obj) }.should raise_error(TypeError) + end + + it "tries to convert obj to a string via to_str" do + obj = mock('lo') + def obj.to_str() "lo" end + "hello".rindex(obj).should == "hello".rindex("lo") + + obj = mock('o') + def obj.respond_to?(arg, *) true end + def obj.method_missing(*args) "o" end + "hello".rindex(obj).should == "hello".rindex("o") + end +end + +describe "String#rindex with String" do + it "behaves the same as String#rindex(char) for one-character strings" do + "blablabla hello cruel world...!".split("").uniq.each do |str| + chr = str[0] + str.rindex(str).should == str.rindex(chr) + + 0.upto(str.size + 1) do |start| + str.rindex(str, start).should == str.rindex(chr, start) + end + + (-str.size - 1).upto(-1) do |start| + str.rindex(str, start).should == str.rindex(chr, start) + end + end + end + + it "behaves the same as String#rindex(?char) for one-character strings" do + "blablabla hello cruel world...!".split("").uniq.each do |str| + chr = str[0] =~ / / ? str[0] : eval("?#{str[0]}") + str.rindex(str).should == str.rindex(chr) + + 0.upto(str.size + 1) do |start| + str.rindex(str, start).should == str.rindex(chr, start) + end + + (-str.size - 1).upto(-1) do |start| + str.rindex(str, start).should == str.rindex(chr, start) + end + end + end + + it "returns the index of the last occurrence of the given substring" do + "blablabla".rindex("").should == 9 + "blablabla".rindex("a").should == 8 + "blablabla".rindex("la").should == 7 + "blablabla".rindex("bla").should == 6 + "blablabla".rindex("abla").should == 5 + "blablabla".rindex("labla").should == 4 + "blablabla".rindex("blabla").should == 3 + "blablabla".rindex("ablabla").should == 2 + "blablabla".rindex("lablabla").should == 1 + "blablabla".rindex("blablabla").should == 0 + + "blablabla".rindex("l").should == 7 + "blablabla".rindex("bl").should == 6 + "blablabla".rindex("abl").should == 5 + "blablabla".rindex("labl").should == 4 + "blablabla".rindex("blabl").should == 3 + "blablabla".rindex("ablabl").should == 2 + "blablabla".rindex("lablabl").should == 1 + "blablabla".rindex("blablabl").should == 0 + + "blablabla".rindex("b").should == 6 + "blablabla".rindex("ab").should == 5 + "blablabla".rindex("lab").should == 4 + "blablabla".rindex("blab").should == 3 + "blablabla".rindex("ablab").should == 2 + "blablabla".rindex("lablab").should == 1 + "blablabla".rindex("blablab").should == 0 + end + + it "doesn't set $~" do + $~ = nil + + 'hello.'.rindex('ll') + $~.should == nil + end + + it "ignores string subclasses" do + "blablabla".rindex(StringSpecs::MyString.new("bla")).should == 6 + StringSpecs::MyString.new("blablabla").rindex("bla").should == 6 + StringSpecs::MyString.new("blablabla").rindex(StringSpecs::MyString.new("bla")).should == 6 + end + + it "starts the search at the given offset" do + "blablabla".rindex("bl", 0).should == 0 + "blablabla".rindex("bl", 1).should == 0 + "blablabla".rindex("bl", 2).should == 0 + "blablabla".rindex("bl", 3).should == 3 + + "blablabla".rindex("bla", 0).should == 0 + "blablabla".rindex("bla", 1).should == 0 + "blablabla".rindex("bla", 2).should == 0 + "blablabla".rindex("bla", 3).should == 3 + + "blablabla".rindex("blab", 0).should == 0 + "blablabla".rindex("blab", 1).should == 0 + "blablabla".rindex("blab", 2).should == 0 + "blablabla".rindex("blab", 3).should == 3 + "blablabla".rindex("blab", 6).should == 3 + "blablablax".rindex("blab", 6).should == 3 + + "blablabla".rindex("la", 1).should == 1 + "blablabla".rindex("la", 2).should == 1 + "blablabla".rindex("la", 3).should == 1 + "blablabla".rindex("la", 4).should == 4 + + "blablabla".rindex("lab", 1).should == 1 + "blablabla".rindex("lab", 2).should == 1 + "blablabla".rindex("lab", 3).should == 1 + "blablabla".rindex("lab", 4).should == 4 + + "blablabla".rindex("ab", 2).should == 2 + "blablabla".rindex("ab", 3).should == 2 + "blablabla".rindex("ab", 4).should == 2 + "blablabla".rindex("ab", 5).should == 5 + + "blablabla".rindex("", 0).should == 0 + "blablabla".rindex("", 1).should == 1 + "blablabla".rindex("", 2).should == 2 + "blablabla".rindex("", 7).should == 7 + "blablabla".rindex("", 8).should == 8 + "blablabla".rindex("", 9).should == 9 + "blablabla".rindex("", 10).should == 9 + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.rindex(needle, offset).should == + str.rindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".rindex("B").should == nil + "blablabla".rindex("z").should == nil + "blablabla".rindex("BLA").should == nil + "blablabla".rindex("blablablabla").should == nil + + "hello".rindex("lo", 0).should == nil + "hello".rindex("lo", 1).should == nil + "hello".rindex("lo", 2).should == nil + + "hello".rindex("llo", 0).should == nil + "hello".rindex("llo", 1).should == nil + + "hello".rindex("el", 0).should == nil + "hello".rindex("ello", 0).should == nil + + "hello".rindex("", -6).should == nil + "hello".rindex("", -7).should == nil + + "hello".rindex("h", -6).should == nil + end + + it "tries to convert start_offset to an integer via to_int" do + obj = mock('5') + def obj.to_int() 5 end + "str".rindex("st", obj).should == 0 + + obj = mock('5') + def obj.respond_to?(arg, *) true end + def obj.method_missing(*args) 5 end + "str".rindex("st", obj).should == 0 + end + + it "raises a TypeError when given offset is nil" do + -> { "str".rindex("st", nil) }.should raise_error(TypeError) + end + + it "handles a substring in a superset encoding" do + 'abc'.dup.force_encoding(Encoding::US_ASCII).rindex('é').should == nil + end + + it "handles a substring in a subset encoding" do + 'été'.rindex('t'.dup.force_encoding(Encoding::US_ASCII)).should == 1 + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + str = 'abc'.dup.force_encoding("ISO-2022-JP") + pattern = 'b'.dup.force_encoding("EUC-JP") + + -> { str.rindex(pattern) }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: ISO-2022-JP and EUC-JP") + end +end + +describe "String#rindex with Regexp" do + it "behaves the same as String#rindex(string) for escaped string regexps" do + ["blablabla", "hello cruel world...!"].each do |str| + ["", "b", "bla", "lab", "o c", "d."].each do |needle| + regexp = Regexp.new(Regexp.escape(needle)) + str.rindex(regexp).should == str.rindex(needle) + + 0.upto(str.size + 1) do |start| + str.rindex(regexp, start).should == str.rindex(needle, start) + end + + (-str.size - 1).upto(-1) do |start| + str.rindex(regexp, start).should == str.rindex(needle, start) + end + end + end + end + + it "returns the index of the first match from the end of string of regexp" do + "blablabla".rindex(/bla/).should == 6 + "blablabla".rindex(/BLA/i).should == 6 + + "blablabla".rindex(/.{0}/).should == 9 + "blablabla".rindex(/.{1}/).should == 8 + "blablabla".rindex(/.{2}/).should == 7 + "blablabla".rindex(/.{6}/).should == 3 + "blablabla".rindex(/.{9}/).should == 0 + + "blablabla".rindex(/.*/).should == 9 + "blablabla".rindex(/.+/).should == 8 + + "blablabla".rindex(/bla|a/).should == 8 + + not_supported_on :opal do + "blablabla".rindex(/\A/).should == 0 + "blablabla".rindex(/\Z/).should == 9 + "blablabla".rindex(/\z/).should == 9 + "blablabla\n".rindex(/\Z/).should == 10 + "blablabla\n".rindex(/\z/).should == 10 + end + + "blablabla".rindex(/^/).should == 0 + not_supported_on :opal do + "\nblablabla".rindex(/^/).should == 1 + "b\nlablabla".rindex(/^/).should == 2 + end + "blablabla".rindex(/$/).should == 9 + + "blablabla".rindex(/.l./).should == 6 + end + + it "sets $~ to MatchData of match and nil when there's none" do + 'hello.'.rindex(/.(.)/) + $~[0].should == 'o.' + + 'hello.'.rindex(/not/) + $~.should == nil + end + + it "starts the search at the given offset" do + "blablabla".rindex(/.{0}/, 5).should == 5 + "blablabla".rindex(/.{1}/, 5).should == 5 + "blablabla".rindex(/.{2}/, 5).should == 5 + "blablabla".rindex(/.{3}/, 5).should == 5 + "blablabla".rindex(/.{4}/, 5).should == 5 + + "blablabla".rindex(/.{0}/, 3).should == 3 + "blablabla".rindex(/.{1}/, 3).should == 3 + "blablabla".rindex(/.{2}/, 3).should == 3 + "blablabla".rindex(/.{5}/, 3).should == 3 + "blablabla".rindex(/.{6}/, 3).should == 3 + + "blablabla".rindex(/.l./, 0).should == 0 + "blablabla".rindex(/.l./, 1).should == 0 + "blablabla".rindex(/.l./, 2).should == 0 + "blablabla".rindex(/.l./, 3).should == 3 + + "blablablax".rindex(/.x/, 10).should == 8 + "blablablax".rindex(/.x/, 9).should == 8 + "blablablax".rindex(/.x/, 8).should == 8 + + "blablablax".rindex(/..x/, 10).should == 7 + "blablablax".rindex(/..x/, 9).should == 7 + "blablablax".rindex(/..x/, 8).should == 7 + "blablablax".rindex(/..x/, 7).should == 7 + + not_supported_on :opal do + "blablabla\n".rindex(/\Z/, 9).should == 9 + end + end + + it "starts the search at offset + self.length if offset is negative" do + str = "blablabla" + + ["bl", "bla", "blab", "la", "lab", "ab", ""].each do |needle| + (-str.length .. -1).each do |offset| + str.rindex(needle, offset).should == + str.rindex(needle, offset + str.length) + end + end + end + + it "returns nil if the substring isn't found" do + "blablabla".rindex(/BLA/).should == nil + "blablabla".rindex(/.{10}/).should == nil + "blablablax".rindex(/.x/, 7).should == nil + "blablablax".rindex(/..x/, 6).should == nil + + not_supported_on :opal do + "blablabla".rindex(/\Z/, 5).should == nil + "blablabla".rindex(/\z/, 5).should == nil + "blablabla\n".rindex(/\z/, 9).should == nil + end + end + + not_supported_on :opal do + it "supports \\G which matches at the given start offset" do + "helloYOU.".rindex(/YOU\G/, 8).should == 5 + "helloYOU.".rindex(/YOU\G/).should == nil + + idx = "helloYOUall!".index("YOU") + re = /YOU.+\G.+/ + # The # marks where \G will match. + [ + ["helloYOU#all.", nil], + ["helloYOUa#ll.", idx], + ["helloYOUal#l.", idx], + ["helloYOUall#.", idx], + ["helloYOUall.#", nil] + ].each do |i| + start = i[0].index("#") + str = i[0].delete("#") + + str.rindex(re, start).should == i[1] + end + end + end + + it "tries to convert start_offset to an integer via to_int" do + obj = mock('5') + def obj.to_int() 5 end + "str".rindex(/../, obj).should == 1 + + obj = mock('5') + def obj.respond_to?(arg, *) true end + def obj.method_missing(*args); 5; end + "str".rindex(/../, obj).should == 1 + end + + it "raises a TypeError when given offset is nil" do + -> { "str".rindex(/../, nil) }.should raise_error(TypeError) + end + + it "returns the reverse character index of a multibyte character" do + "ありがりがとう".rindex("が").should == 4 + "ありがりがとう".rindex(/が/).should == 4 + end + + it "returns the character index before the finish" do + "ありがりがとう".rindex("が", 3).should == 2 + "ありがりがとう".rindex(/が/, 3).should == 2 + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + re = Regexp.new "れ".encode(Encoding::EUC_JP) + -> do + "あれ".rindex re + end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") + end +end diff --git a/spec/ruby/core/string/rjust_spec.rb b/spec/ruby/core/string/rjust_spec.rb new file mode 100644 index 0000000000..4ad3e54aea --- /dev/null +++ b/spec/ruby/core/string/rjust_spec.rb @@ -0,0 +1,100 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#rjust with length, padding" do + it "returns a new string of specified length with self right justified and padded with padstr" do + "hello".rjust(20, '1234').should == "123412341234123hello" + + "".rjust(1, "abcd").should == "a" + "".rjust(2, "abcd").should == "ab" + "".rjust(3, "abcd").should == "abc" + "".rjust(4, "abcd").should == "abcd" + "".rjust(6, "abcd").should == "abcdab" + + "OK".rjust(3, "abcd").should == "aOK" + "OK".rjust(4, "abcd").should == "abOK" + "OK".rjust(6, "abcd").should == "abcdOK" + "OK".rjust(8, "abcd").should == "abcdabOK" + end + + it "pads with whitespace if no padstr is given" do + "hello".rjust(20).should == " hello" + end + + it "returns self if it's longer than or as long as the specified length" do + "".rjust(0).should == "" + "".rjust(-1).should == "" + "hello".rjust(4).should == "hello" + "hello".rjust(-1).should == "hello" + "this".rjust(3).should == "this" + "radiology".rjust(8, '-').should == "radiology" + end + + it "tries to convert length to an integer using to_int" do + "^".rjust(3.8, "^_").should == "^_^" + + obj = mock('3') + obj.should_receive(:to_int).and_return(3) + + "o".rjust(obj, "o_").should == "o_o" + end + + it "raises a TypeError when length can't be converted to an integer" do + -> { "hello".rjust("x") }.should raise_error(TypeError) + -> { "hello".rjust("x", "y") }.should raise_error(TypeError) + -> { "hello".rjust([]) }.should raise_error(TypeError) + -> { "hello".rjust(mock('x')) }.should raise_error(TypeError) + end + + it "tries to convert padstr to a string using to_str" do + padstr = mock('123') + padstr.should_receive(:to_str).and_return("123") + + "hello".rjust(10, padstr).should == "12312hello" + end + + it "raises a TypeError when padstr can't be converted" do + -> { "hello".rjust(20, []) }.should raise_error(TypeError) + -> { "hello".rjust(20, Object.new)}.should raise_error(TypeError) + -> { "hello".rjust(20, mock('x')) }.should raise_error(TypeError) + end + + it "raises an ArgumentError when padstr is empty" do + -> { "hello".rjust(10, '') }.should raise_error(ArgumentError) + end + + it "returns String instances when called on subclasses" do + StringSpecs::MyString.new("").rjust(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").rjust(10).should be_an_instance_of(String) + StringSpecs::MyString.new("foo").rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + + "".rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + "foo".rjust(10, StringSpecs::MyString.new("x")).should be_an_instance_of(String) + end + + describe "with width" do + it "returns a String in the same encoding as the original" do + str = "abc".dup.force_encoding Encoding::IBM437 + result = str.rjust 5 + result.should == " abc" + result.encoding.should equal(Encoding::IBM437) + end + end + + describe "with width, pattern" do + it "returns a String in the compatible encoding" do + str = "abc".dup.force_encoding Encoding::IBM437 + result = str.rjust 5, "あ" + result.should == "ああabc" + result.encoding.should equal(Encoding::UTF_8) + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + pat = "ア".encode Encoding::EUC_JP + -> do + "あれ".rjust 5, pat + end.should raise_error(Encoding::CompatibilityError) + end + end +end diff --git a/spec/ruby/core/string/rpartition_spec.rb b/spec/ruby/core/string/rpartition_spec.rb new file mode 100644 index 0000000000..cef0384c73 --- /dev/null +++ b/spec/ruby/core/string/rpartition_spec.rb @@ -0,0 +1,71 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/partition' + +describe "String#rpartition with String" do + it_behaves_like :string_partition, :rpartition + + it "returns an array of substrings based on splitting on the given string" do + "hello world".rpartition("o").should == ["hello w", "o", "rld"] + end + + it "always returns 3 elements" do + "hello".rpartition("x").should == ["", "", "hello"] + "hello".rpartition("hello").should == ["", "hello", ""] + end + + it "returns original string if regexp doesn't match" do + "hello".rpartition("/x/").should == ["", "", "hello"] + end + + it "returns new object if doesn't match" do + str = "hello" + str.rpartition("/no_match/").last.should_not.equal?(str) + end + + it "handles multibyte string correctly" do + "ユーザ@ドメイン".rpartition(/@/).should == ["ユーザ", "@", "ドメイン"] + end + + it "accepts regexp" do + "hello!".rpartition(/l./).should == ["hel", "lo", "!"] + end + + it "affects $~" do + matched_string = "hello!".rpartition(/l./)[1] + matched_string.should == $~[0] + end + + it "converts its argument using :to_str" do + find = mock('l') + find.should_receive(:to_str).and_return("l") + "hello".rpartition(find).should == ["hel","l","o"] + end + + it "raises an error if not convertible to string" do + ->{ "hello".rpartition(5) }.should raise_error(TypeError) + ->{ "hello".rpartition(nil) }.should raise_error(TypeError) + end + + it "handles a pattern in a superset encoding" do + string = "hello".dup.force_encoding(Encoding::US_ASCII) + + result = string.rpartition("é") + + result.should == ["", "", "hello"] + result[0].encoding.should == Encoding::US_ASCII + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + pattern = "o".dup.force_encoding(Encoding::US_ASCII) + + result = "héllo world".rpartition(pattern) + + result.should == ["héllo w", "o", "rld"] + result[0].encoding.should == Encoding::UTF_8 + result[1].encoding.should == Encoding::US_ASCII + result[2].encoding.should == Encoding::UTF_8 + end +end diff --git a/spec/ruby/core/string/rstrip_spec.rb b/spec/ruby/core/string/rstrip_spec.rb new file mode 100644 index 0000000000..55773f5238 --- /dev/null +++ b/spec/ruby/core/string/rstrip_spec.rb @@ -0,0 +1,80 @@ +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/strip' + +describe "String#rstrip" do + it_behaves_like :string_strip, :rstrip + + it "returns a copy of self with trailing whitespace removed" do + " hello ".rstrip.should == " hello" + " hello world ".rstrip.should == " hello world" + " hello world \n\r\t\n\v\r".rstrip.should == " hello world" + "hello".rstrip.should == "hello" + "hello\x00".rstrip.should == "hello" + "こにちわ ".rstrip.should == "こにちわ" + end + + it "works with lazy substrings" do + " hello "[1...-1].rstrip.should == " hello" + " hello world "[1...-1].rstrip.should == " hello world" + " hello world \n\r\t\n\v\r"[1...-1].rstrip.should == " hello world" + " こにちわ "[1...-1].rstrip.should == "こにちわ" + end + + it "returns a copy of self with all trailing whitespace and NULL bytes removed" do + "\x00 \x00hello\x00 \x00".rstrip.should == "\x00 \x00hello" + end +end + +describe "String#rstrip!" do + it "modifies self in place and returns self" do + a = " hello " + a.rstrip!.should equal(a) + a.should == " hello" + end + + it "modifies self removing trailing NULL bytes and whitespace" do + a = "\x00 \x00hello\x00 \x00" + a.rstrip! + a.should == "\x00 \x00hello" + end + + it "returns nil if no modifications were made" do + a = "hello" + a.rstrip!.should == nil + a.should == "hello" + end + + it "makes a string empty if it is only whitespace" do + "".rstrip!.should == nil + " ".rstrip.should == "" + " ".rstrip.should == "" + end + + it "removes trailing NULL bytes and whitespace" do + a = "\000 goodbye \000" + a.rstrip! + a.should == "\000 goodbye" + end + + it "raises a FrozenError on a frozen instance that is modified" do + -> { " hello ".freeze.rstrip! }.should raise_error(FrozenError) + end + + # see [ruby-core:23666] + it "raises a FrozenError on a frozen instance that would not be modified" do + -> { "hello".freeze.rstrip! }.should raise_error(FrozenError) + -> { "".freeze.rstrip! }.should raise_error(FrozenError) + end + + it "raises an Encoding::CompatibilityError if the last non-space codepoint is invalid" do + s = "abc\xDF".force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + -> { s.rstrip! }.should raise_error(Encoding::CompatibilityError) + + s = "abc\xDF ".force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + -> { s.rstrip! }.should raise_error(Encoding::CompatibilityError) + end +end diff --git a/spec/ruby/core/string/scan_spec.rb b/spec/ruby/core/string/scan_spec.rb new file mode 100644 index 0000000000..bbe843b591 --- /dev/null +++ b/spec/ruby/core/string/scan_spec.rb @@ -0,0 +1,173 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#scan" do + it "returns an array containing all matches" do + "cruel world".scan(/\w+/).should == ["cruel", "world"] + "cruel world".scan(/.../).should == ["cru", "el ", "wor"] + + # Edge case + "hello".scan(//).should == ["", "", "", "", "", ""] + "".scan(//).should == [""] + end + + it "respects unicode when the pattern collapses to nothing" do + str = "こにちわ" + reg = %r!! + str.scan(reg).should == ["", "", "", "", ""] + end + + it "stores groups as arrays in the returned arrays" do + "hello".scan(/()/).should == [[""]] * 6 + "hello".scan(/()()/).should == [["", ""]] * 6 + "cruel world".scan(/(...)/).should == [["cru"], ["el "], ["wor"]] + "cruel world".scan(/(..)(..)/).should == [["cr", "ue"], ["l ", "wo"]] + end + + it "scans for occurrences of the string if pattern is a string" do + "one two one two".scan('one').should == ["one", "one"] + "hello.".scan('.').should == ['.'] + end + + it "sets $~ to MatchData of last match and nil when there's none" do + 'hello.'.scan(/.(.)/) + $~[0].should == 'o.' + + 'hello.'.scan(/not/) + $~.should == nil + + 'hello.'.scan('l') + $~.begin(0).should == 3 + $~[0].should == 'l' + + 'hello.'.scan('not') + $~.should == nil + end + + it "supports \\G which matches the end of the previous match / string start for first match" do + "one two one two".scan(/\G\w+/).should == ["one"] + "one two one two".scan(/\G\w+\s*/).should == ["one ", "two ", "one ", "two"] + "one two one two".scan(/\G\s*\w+/).should == ["one", " two", " one", " two"] + end + + it "tries to convert pattern to a string via to_str" do + obj = mock('o') + obj.should_receive(:to_str).and_return("o") + "o_o".scan(obj).should == ["o", "o"] + end + + it "raises a TypeError if pattern isn't a Regexp and can't be converted to a String" do + -> { "cruel world".scan(5) }.should raise_error(TypeError) + not_supported_on :opal do + -> { "cruel world".scan(:test) }.should raise_error(TypeError) + end + -> { "cruel world".scan(mock('x')) }.should raise_error(TypeError) + end + + # jruby/jruby#5513 + it "does not raise any errors when passed a multi-byte string" do + "あああaaaあああ".scan("あああ").should == ["あああ", "あああ"] + end + + it "returns Strings in the same encoding as self" do + "cruel world".encode("US-ASCII").scan(/\w+/).each do |s| + s.encoding.should == Encoding::US_ASCII + end + end +end + +describe "String#scan with pattern and block" do + it "returns self" do + s = "foo" + s.scan(/./) {}.should equal(s) + s.scan(/roar/) {}.should equal(s) + end + + it "passes each match to the block as one argument: an array" do + a = [] + "cruel world".scan(/\w+/) { |*w| a << w } + a.should == [["cruel"], ["world"]] + end + + it "passes groups to the block as one argument: an array" do + a = [] + "cruel world".scan(/(..)(..)/) { |w| a << w } + a.should == [["cr", "ue"], ["l ", "wo"]] + end + + it "sets $~ for access from the block" do + str = "hello" + + matches = [] + offsets = [] + + str.scan(/([aeiou])/) do + md = $~ + md.string.should == str + matches << md.to_a + offsets << md.offset(0) + str + end + + matches.should == [["e", "e"], ["o", "o"]] + offsets.should == [[1, 2], [4, 5]] + + matches = [] + offsets = [] + + str.scan("l") do + md = $~ + md.string.should == str + matches << md.to_a + offsets << md.offset(0) + str + end + + matches.should == [["l"], ["l"]] + offsets.should == [[2, 3], [3, 4]] + end + + it "restores $~ after leaving the block" do + [/./, "l"].each do |pattern| + old_md = nil + "hello".scan(pattern) do + old_md = $~ + "ok".match(/./) + "x" + end + + $~[0].should == old_md[0] + $~.string.should == "hello" + end + end + + it "sets $~ to MatchData of last match and nil when there's none for access from outside" do + 'hello.'.scan('l') { 'x' } + $~.begin(0).should == 3 + $~[0].should == 'l' + + 'hello.'.scan('not') { 'x' } + $~.should == nil + + 'hello.'.scan(/.(.)/) { 'x' } + $~[0].should == 'o.' + + 'hello.'.scan(/not/) { 'x' } + $~.should == nil + end + + it "passes block arguments as individual arguments when blocks are provided" do + "a b c\na b c\na b c".scan(/(\w*) (\w*) (\w*)/) do |first,second,third| + first.should == 'a'; + second.should == 'b'; + third.should == 'c'; + end + end + + it "yields String instances for subclasses" do + a = [] + StringSpecs::MyString.new("abc").scan(/./) { |s| a << s.class } + a.should == [String, String, String] + end +end diff --git a/spec/ruby/core/string/scrub_spec.rb b/spec/ruby/core/string/scrub_spec.rb new file mode 100644 index 0000000000..b9ef0f1a16 --- /dev/null +++ b/spec/ruby/core/string/scrub_spec.rb @@ -0,0 +1,164 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#scrub with a default replacement" do + it "returns self for valid strings" do + input = "foo" + + input.scrub.should == input + end + + it "replaces invalid byte sequences" do + x81 = [0x81].pack('C').force_encoding('utf-8') + "abc\u3042#{x81}".scrub.should == "abc\u3042\uFFFD" + end + + it "replaces invalid byte sequences in lazy substrings" do + x81 = [0x81].pack('C').force_encoding('utf-8') + "abc\u3042#{x81}def"[1...-1].scrub.should == "bc\u3042\uFFFDde" + end + + it "returns a copy of self when the input encoding is BINARY" do + input = "foo".encode('BINARY') + + input.scrub.should == "foo" + end + + it "replaces invalid byte sequences when using ASCII as the input encoding" do + xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8' + input = "abc\u3042#{xE3x80}".force_encoding('ASCII') + input.scrub.should == "abc?????" + end + + it "returns a String in the same encoding as self" do + x81 = [0x81].pack('C').force_encoding('utf-8') + "abc\u3042#{x81}".scrub.encoding.should == Encoding::UTF_8 + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("foo").scrub.should be_an_instance_of(String) + input = [0x81].pack('C').force_encoding('utf-8') + StringSpecs::MyString.new(input).scrub.should be_an_instance_of(String) + end +end + +describe "String#scrub with a custom replacement" do + it "returns self for valid strings" do + input = "foo" + + input.scrub("*").should == input + end + + it "replaces invalid byte sequences" do + x81 = [0x81].pack('C').force_encoding('utf-8') + "abc\u3042#{x81}".scrub("*").should == "abc\u3042*" + end + + it "replaces invalid byte sequences in frozen strings" do + x81 = [0x81].pack('C').force_encoding('utf-8') + (-"abc\u3042#{x81}").scrub("*").should == "abc\u3042*" + + leading_surrogate = [0x00, 0xD8] + utf16_str = ("abc".encode('UTF-16LE').bytes + leading_surrogate).pack('c*').force_encoding('UTF-16LE') + (-(utf16_str)).scrub("*".encode('UTF-16LE')).should == "abc*".encode('UTF-16LE') + end + + it "replaces an incomplete character at the end with a single replacement" do + xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8' + xE3x80.scrub("*").should == "*" + end + + it "raises ArgumentError for replacements with an invalid encoding" do + x81 = [0x81].pack('C').force_encoding('utf-8') + xE4 = [0xE4].pack('C').force_encoding('utf-8') + block = -> { "foo#{x81}".scrub(xE4) } + + block.should raise_error(ArgumentError) + end + + it "returns a String in the same encoding as self" do + x81 = [0x81].pack('C').force_encoding('utf-8') + "abc\u3042#{x81}".scrub("*").encoding.should == Encoding::UTF_8 + end + + it "raises TypeError when a non String replacement is given" do + x81 = [0x81].pack('C').force_encoding('utf-8') + block = -> { "foo#{x81}".scrub(1) } + + block.should raise_error(TypeError) + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("foo").scrub("*").should be_an_instance_of(String) + input = [0x81].pack('C').force_encoding('utf-8') + StringSpecs::MyString.new(input).scrub("*").should be_an_instance_of(String) + end +end + +describe "String#scrub with a block" do + it "returns self for valid strings" do + input = "foo" + + input.scrub { |b| "*" }.should == input + end + + it "replaces invalid byte sequences" do + xE3x80 = [0xE3, 0x80].pack('CC').force_encoding 'utf-8' + replaced = "abc\u3042#{xE3x80}".scrub { |b| "<#{b.unpack("H*")[0]}>" } + + replaced.should == "abc\u3042<e380>" + end + + it "replaces invalid byte sequences using a custom encoding" do + x80x80 = [0x80, 0x80].pack('CC').force_encoding 'utf-8' + replaced = x80x80.scrub do |bad| + bad.encode(Encoding::UTF_8, Encoding::Windows_1252) + end + + replaced.should == "€€" + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("foo").scrub { |b| "*" }.should be_an_instance_of(String) + input = [0x81].pack('C').force_encoding('utf-8') + StringSpecs::MyString.new(input).scrub { |b| "<#{b.unpack("H*")[0]}>" }.should be_an_instance_of(String) + end +end + +describe "String#scrub!" do + it "modifies self for valid strings" do + x81 = [0x81].pack('C').force_encoding('utf-8') + input = "a#{x81}" + input.scrub! + input.should == "a\uFFFD" + end + + it "accepts blocks" do + x81 = [0x81].pack('C').force_encoding('utf-8') + input = "a#{x81}" + input.scrub! { |b| "<?>" } + input.should == "a<?>" + end + + it "maintains the state of frozen strings that are already valid" do + input = "a" + input.freeze + input.scrub! + input.frozen?.should be_true + end + + it "preserves the instance variables of already valid strings" do + input = "a" + input.instance_variable_set(:@a, 'b') + input.scrub! + input.instance_variable_get(:@a).should == 'b' + end + + it "accepts a frozen string as a replacement" do + input = "a\xE2" + input.scrub!('.'.freeze) + input.should == 'a.' + end +end diff --git a/spec/ruby/core/string/setbyte_spec.rb b/spec/ruby/core/string/setbyte_spec.rb new file mode 100644 index 0000000000..85403ca62c --- /dev/null +++ b/spec/ruby/core/string/setbyte_spec.rb @@ -0,0 +1,112 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' + +describe "String#setbyte" do + it "returns an Integer" do + "a".setbyte(0,1).should be_kind_of(Integer) + end + + it "modifies the receiver" do + str = "glark" + old_id = str.object_id + str.setbyte(0,88) + str.object_id.should == old_id + end + + it "changes the byte at the given index to the new byte" do + str = "a" + str.setbyte(0,98) + str.should == 'b' + + # copy-on-write case + str1, str2 = "fooXbar".split("X") + str2.setbyte(0, 50) + str2.should == "2ar" + str1.should == "foo" + end + + it "allows changing bytes in multi-byte characters" do + str = "\u{915}" + str.setbyte(1,254) + str.getbyte(1).should == 254 + end + + it "can invalidate a String's encoding" do + str = "glark" + str.valid_encoding?.should be_true + str.setbyte(2,253) + str.valid_encoding?.should be_false + + str = "ABC" + str.setbyte(0, 0x20) # ' ' + str.should.valid_encoding? + str.setbyte(0, 0xE3) + str.should_not.valid_encoding? + end + + it "regards a negative index as counting from the end of the String" do + str = "hedgehog" + str.setbyte(-3, 108) + str.should == "hedgelog" + + # copy-on-write case + str1, str2 = "fooXbar".split("X") + str2.setbyte(-1, 50) + str2.should == "ba2" + str1.should == "foo" + end + + it "raises an IndexError if the index is greater than the String bytesize" do + -> { "?".setbyte(1, 97) }.should raise_error(IndexError) + end + + it "raises an IndexError if the negative index is greater magnitude than the String bytesize" do + -> { "???".setbyte(-5, 97) }.should raise_error(IndexError) + end + + it "sets a byte at an index greater than String size" do + chr = "\u{998}" + chr.bytesize.should == 3 + chr.setbyte(2, 150) + chr.should == "\xe0\xa6\x96" + end + + it "does not modify the original string when using String.new" do + str1 = "hedgehog" + str2 = String.new(str1) + str2.setbyte(0, 108) + str2.should == "ledgehog" + str2.should_not == "hedgehog" + str1.should == "hedgehog" + str1.should_not == "ledgehog" + end + + it "raises a FrozenError if self is frozen" do + str = "cold".freeze + str.frozen?.should be_true + -> { str.setbyte(3,96) }.should raise_error(FrozenError) + end + + it "raises a TypeError unless the second argument is an Integer" do + -> { "a".setbyte(0,'a') }.should raise_error(TypeError) + end + + it "calls #to_int to convert the index" do + index = mock("setbyte index") + index.should_receive(:to_int).and_return(1) + + str = "hat" + str.setbyte(index, "i".ord) + str.should == "hit" + end + + it "calls to_int to convert the value" do + value = mock("setbyte value") + value.should_receive(:to_int).and_return("i".ord) + + str = "hat" + str.setbyte(1, value) + str.should == "hit" + end +end diff --git a/spec/ruby/core/string/shared/byte_index_common.rb b/spec/ruby/core/string/shared/byte_index_common.rb new file mode 100644 index 0000000000..3de1453f4f --- /dev/null +++ b/spec/ruby/core/string/shared/byte_index_common.rb @@ -0,0 +1,63 @@ +# -*- encoding: utf-8 -*- +require_relative '../../../spec_helper' + +describe :byte_index_common, shared: true do + describe "raises on type errors" do + it "raises a TypeError if passed nil" do + -> { "abc".send(@method, nil) }.should raise_error(TypeError, "no implicit conversion of nil into String") + end + + it "raises a TypeError if passed a boolean" do + -> { "abc".send(@method, true) }.should raise_error(TypeError, "no implicit conversion of true into String") + end + + it "raises a TypeError if passed a Symbol" do + not_supported_on :opal do + -> { "abc".send(@method, :a) }.should raise_error(TypeError, "no implicit conversion of Symbol into String") + end + end + + it "raises a TypeError if passed a Symbol" do + obj = mock('x') + obj.should_not_receive(:to_int) + -> { "hello".send(@method, obj) }.should raise_error(TypeError, "no implicit conversion of MockObject into String") + end + + it "raises a TypeError if passed an Integer" do + -> { "abc".send(@method, 97) }.should raise_error(TypeError, "no implicit conversion of Integer into String") + end + end + + describe "with multibyte codepoints" do + it "raises an IndexError when byte offset lands in the middle of a multibyte character" do + -> { "わ".send(@method, "", 1) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + -> { "わ".send(@method, "", 2) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + -> { "わ".send(@method, "", -1) }.should raise_error(IndexError, "offset 2 does not land on character boundary") + -> { "わ".send(@method, "", -2) }.should raise_error(IndexError, "offset 1 does not land on character boundary") + end + + it "raises an Encoding::CompatibilityError if the encodings are incompatible" do + re = Regexp.new "れ".encode(Encoding::EUC_JP) + -> do + "あれ".send(@method, re) + end.should raise_error(Encoding::CompatibilityError, "incompatible encoding regexp match (EUC-JP regexp with UTF-8 string)") + end + end + + describe "with global variables" do + it "doesn't set $~ for non regex search" do + $~ = nil + + 'hello.'.send(@method, 'll') + $~.should == nil + end + + it "sets $~ to MatchData of match and nil when there's none" do + 'hello.'.send(@method, /.e./) + $~[0].should == 'hel' + + 'hello.'.send(@method, /not/) + $~.should == nil + end + end +end diff --git a/spec/ruby/core/string/shared/chars.rb b/spec/ruby/core/string/shared/chars.rb new file mode 100644 index 0000000000..c730643cf4 --- /dev/null +++ b/spec/ruby/core/string/shared/chars.rb @@ -0,0 +1,66 @@ +# -*- encoding: utf-8 -*- +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' + +describe :string_chars, shared: true do + it "passes each char in self to the given block" do + a = [] + "hello".send(@method) { |c| a << c } + a.should == ['h', 'e', 'l', 'l', 'o'] + end + + it "returns self" do + s = StringSpecs::MyString.new "hello" + s.send(@method){}.should equal(s) + end + + + it "is unicode aware" do + "\303\207\342\210\202\303\251\306\222g".send(@method).to_a.should == + ["\303\207", "\342\210\202", "\303\251", "\306\222", "g"] + end + + it "returns characters in the same encoding as self" do + "&%".dup.force_encoding('Shift_JIS').send(@method).to_a.all? {|c| c.encoding.name.should == 'Shift_JIS'} + "&%".encode('BINARY').send(@method).to_a.all? {|c| c.encoding.should == Encoding::BINARY } + end + + it "works with multibyte characters" do + s = "\u{8987}".dup.force_encoding("UTF-8") + s.bytesize.should == 3 + s.send(@method).to_a.should == [s] + end + + it "works if the String's contents is invalid for its encoding" do + xA4 = [0xA4].pack('C') + xA4.force_encoding('UTF-8') + xA4.valid_encoding?.should be_false + xA4.send(@method).to_a.should == [xA4.force_encoding("UTF-8")] + end + + it "returns a different character if the String is transcoded" do + s = "\u{20AC}".dup.force_encoding('UTF-8') + s.encode('UTF-8').send(@method).to_a.should == ["\u{20AC}".dup.force_encoding('UTF-8')] + s.encode('iso-8859-15').send(@method).to_a.should == [[0xA4].pack('C').force_encoding('iso-8859-15')] + s.encode('iso-8859-15').encode('UTF-8').send(@method).to_a.should == ["\u{20AC}".dup.force_encoding('UTF-8')] + end + + it "uses the String's encoding to determine what characters it contains" do + s = +"\u{24B62}" + + s.force_encoding('UTF-8').send(@method).to_a.should == [ + s.force_encoding('UTF-8') + ] + s.force_encoding('BINARY').send(@method).to_a.should == [ + [0xF0].pack('C').force_encoding('BINARY'), + [0xA4].pack('C').force_encoding('BINARY'), + [0xAD].pack('C').force_encoding('BINARY'), + [0xA2].pack('C').force_encoding('BINARY') + ] + s.force_encoding('SJIS').send(@method).to_a.should == [ + [0xF0,0xA4].pack('CC').force_encoding('SJIS'), + [0xAD].pack('C').force_encoding('SJIS'), + [0xA2].pack('C').force_encoding('SJIS') + ] + end +end diff --git a/spec/ruby/core/string/shared/codepoints.rb b/spec/ruby/core/string/shared/codepoints.rb new file mode 100644 index 0000000000..1c28ba3d5e --- /dev/null +++ b/spec/ruby/core/string/shared/codepoints.rb @@ -0,0 +1,62 @@ +# encoding: binary +describe :string_codepoints, shared: true do + it "returns self" do + s = "foo" + result = s.send(@method) {} + result.should equal s + end + + it "raises an ArgumentError when self has an invalid encoding and a method is called on the returned Enumerator" do + s = "\xDF".dup.force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + -> { s.send(@method).to_a }.should raise_error(ArgumentError) + end + + it "yields each codepoint to the block if one is given" do + codepoints = [] + "abcd".send(@method) do |codepoint| + codepoints << codepoint + end + codepoints.should == [97, 98, 99, 100] + end + + it "raises an ArgumentError if self's encoding is invalid and a block is given" do + s = "\xDF".dup.force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + -> { s.send(@method) { } }.should raise_error(ArgumentError) + end + + it "yields codepoints as Integers" do + "glark\u{20}".send(@method).to_a.each do |codepoint| + codepoint.should be_an_instance_of(Integer) + end + end + + it "yields one codepoint for each character" do + s = "\u{9876}\u{28}\u{1987}" + s.send(@method).to_a.size.should == s.chars.to_a.size + end + + it "works for multibyte characters" do + s = "\u{9819}" + s.bytesize.should == 3 + s.send(@method).to_a.should == [38937] + end + + it "yields the codepoints corresponding to the character's position in the String's encoding" do + "\u{787}".send(@method).to_a.should == [1927] + end + + it "round-trips to the original String using Integer#chr" do + s = "\u{13}\u{7711}\u{1010}" + s2 = +"" + s.send(@method) {|n| s2 << n.chr(Encoding::UTF_8)} + s.should == s2 + end + + it "is synonymous with #bytes for Strings which are single-byte optimizable" do + s = "(){}".encode('ascii') + s.ascii_only?.should be_true + s.send(@method).to_a.should == s.bytes.to_a + end +end diff --git a/spec/ruby/core/string/shared/concat.rb b/spec/ruby/core/string/shared/concat.rb new file mode 100644 index 0000000000..dded9a69e7 --- /dev/null +++ b/spec/ruby/core/string/shared/concat.rb @@ -0,0 +1,159 @@ +# frozen_string_literal: false +describe :string_concat, shared: true do + it "concatenates the given argument to self and returns self" do + str = 'hello ' + str.send(@method, 'world').should equal(str) + str.should == "hello world" + end + + it "raises a FrozenError when self is frozen" do + a = "hello" + a.freeze + + -> { a.send(@method, "") }.should raise_error(FrozenError) + -> { a.send(@method, "test") }.should raise_error(FrozenError) + end + + it "returns a String when given a subclass instance" do + a = "hello" + a.send(@method, StringSpecs::MyString.new(" world")) + a.should == "hello world" + a.should be_an_instance_of(String) + end + + it "returns an instance of same class when called on a subclass" do + str = StringSpecs::MyString.new("hello") + str.send(@method, " world") + str.should == "hello world" + str.should be_an_instance_of(StringSpecs::MyString) + end + + describe "with Integer" do + it "concatenates the argument interpreted as a codepoint" do + b = "".send(@method, 33) + b.should == "!" + + b.encode!(Encoding::UTF_8) + b.send(@method, 0x203D) + b.should == "!\u203D" + end + + # #5855 + it "returns a BINARY string if self is US-ASCII and the argument is between 128-255 (inclusive)" do + a = ("".encode(Encoding::US_ASCII).send(@method, 128)) + a.encoding.should == Encoding::BINARY + a.should == 128.chr + + a = ("".encode(Encoding::US_ASCII).send(@method, 255)) + a.encoding.should == Encoding::BINARY + a.should == 255.chr + end + + it "raises RangeError if the argument is an invalid codepoint for self's encoding" do + -> { "".encode(Encoding::US_ASCII).send(@method, 256) }.should raise_error(RangeError) + -> { "".encode(Encoding::EUC_JP).send(@method, 0x81) }.should raise_error(RangeError) + end + + it "raises RangeError if the argument is negative" do + -> { "".send(@method, -200) }.should raise_error(RangeError) + -> { "".send(@method, -bignum_value) }.should raise_error(RangeError) + end + + it "doesn't call to_int on its argument" do + x = mock('x') + x.should_not_receive(:to_int) + + -> { "".send(@method, x) }.should raise_error(TypeError) + end + + it "raises a FrozenError when self is frozen" do + a = "hello" + a.freeze + + -> { a.send(@method, 0) }.should raise_error(FrozenError) + -> { a.send(@method, 33) }.should raise_error(FrozenError) + end + end +end + +describe :string_concat_encoding, shared: true do + describe "when self is in an ASCII-incompatible encoding incompatible with the argument's encoding" do + it "uses self's encoding if both are empty" do + "".encode("UTF-16LE").send(@method, "").encoding.should == Encoding::UTF_16LE + end + + it "uses self's encoding if the argument is empty" do + "x".encode("UTF-16LE").send(@method, "").encoding.should == Encoding::UTF_16LE + end + + it "uses the argument's encoding if self is empty" do + "".encode("UTF-16LE").send(@method, "x".encode("UTF-8")).encoding.should == Encoding::UTF_8 + end + + it "raises Encoding::CompatibilityError if neither are empty" do + -> { "x".encode("UTF-16LE").send(@method, "y".encode("UTF-8")) }.should raise_error(Encoding::CompatibilityError) + end + end + + describe "when the argument is in an ASCII-incompatible encoding incompatible with self's encoding" do + it "uses self's encoding if both are empty" do + "".encode("UTF-8").send(@method, "".encode("UTF-16LE")).encoding.should == Encoding::UTF_8 + end + + it "uses self's encoding if the argument is empty" do + "x".encode("UTF-8").send(@method, "".encode("UTF-16LE")).encoding.should == Encoding::UTF_8 + end + + it "uses the argument's encoding if self is empty" do + "".encode("UTF-8").send(@method, "x".encode("UTF-16LE")).encoding.should == Encoding::UTF_16LE + end + + it "raises Encoding::CompatibilityError if neither are empty" do + -> { "x".encode("UTF-8").send(@method, "y".encode("UTF-16LE")) }.should raise_error(Encoding::CompatibilityError) + end + end + + describe "when self and the argument are in different ASCII-compatible encodings" do + it "uses self's encoding if both are ASCII-only" do + "abc".encode("UTF-8").send(@method, "123".encode("SHIFT_JIS")).encoding.should == Encoding::UTF_8 + end + + it "uses self's encoding if the argument is ASCII-only" do + "\u00E9".encode("UTF-8").send(@method, "123".encode("ISO-8859-1")).encoding.should == Encoding::UTF_8 + end + + it "uses the argument's encoding if self is ASCII-only" do + "abc".encode("UTF-8").send(@method, "\u00E9".encode("ISO-8859-1")).encoding.should == Encoding::ISO_8859_1 + end + + it "raises Encoding::CompatibilityError if neither are ASCII-only" do + -> { "\u00E9".encode("UTF-8").send(@method, "\u00E9".encode("ISO-8859-1")) }.should raise_error(Encoding::CompatibilityError) + end + end + + describe "when self is BINARY and argument is US-ASCII" do + it "uses BINARY encoding" do + "abc".encode("BINARY").send(@method, "123".encode("US-ASCII")).encoding.should == Encoding::BINARY + end + end +end + +describe :string_concat_type_coercion, shared: true do + it "converts the given argument to a String using to_str" do + obj = mock('world!') + obj.should_receive(:to_str).and_return("world!") + a = 'hello '.send(@method, obj) + a.should == 'hello world!' + end + + it "raises a TypeError if the given argument can't be converted to a String" do + -> { 'hello '.send(@method, []) }.should raise_error(TypeError) + -> { 'hello '.send(@method, mock('x')) }.should raise_error(TypeError) + end + + it "raises a NoMethodError if the given argument raises a NoMethodError during type coercion to a String" do + obj = mock('world!') + obj.should_receive(:to_str).and_raise(NoMethodError) + -> { 'hello '.send(@method, obj) }.should raise_error(NoMethodError) + end +end diff --git a/spec/ruby/core/string/shared/dedup.rb b/spec/ruby/core/string/shared/dedup.rb new file mode 100644 index 0000000000..1ffd6aa0fd --- /dev/null +++ b/spec/ruby/core/string/shared/dedup.rb @@ -0,0 +1,51 @@ +# frozen_string_literal: false +describe :string_dedup, shared: true do + it 'returns self if the String is frozen' do + input = 'foo'.freeze + output = input.send(@method) + + output.should equal(input) + output.should.frozen? + end + + it 'returns a frozen copy if the String is not frozen' do + input = 'foo' + output = input.send(@method) + + output.should.frozen? + output.should_not equal(input) + output.should == 'foo' + end + + it "returns the same object for equal unfrozen strings" do + origin = "this is a string" + dynamic = %w(this is a string).join(' ') + + origin.should_not equal(dynamic) + origin.send(@method).should equal(dynamic.send(@method)) + end + + it "returns the same object when it's called on the same String literal" do + "unfrozen string".send(@method).should equal("unfrozen string".send(@method)) + "unfrozen string".send(@method).should_not equal("another unfrozen string".send(@method)) + end + + it "deduplicates frozen strings" do + dynamic = %w(this string is frozen).join(' ').freeze + + dynamic.should_not equal("this string is frozen".freeze) + + dynamic.send(@method).should equal("this string is frozen".freeze) + dynamic.send(@method).should equal("this string is frozen".send(@method).freeze) + end + + it "does not deduplicate a frozen string when it has instance variables" do + dynamic = %w(this string is frozen).join(' ') + dynamic.instance_variable_set(:@a, 1) + dynamic.freeze + + dynamic.send(@method).should_not equal("this string is frozen".freeze) + dynamic.send(@method).should_not equal("this string is frozen".send(@method).freeze) + dynamic.send(@method).should equal(dynamic) + end +end diff --git a/spec/ruby/core/string/shared/each_char_without_block.rb b/spec/ruby/core/string/shared/each_char_without_block.rb new file mode 100644 index 0000000000..397100ce0e --- /dev/null +++ b/spec/ruby/core/string/shared/each_char_without_block.rb @@ -0,0 +1,26 @@ +# -*- encoding: utf-8 -*- +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' + +describe :string_each_char_without_block, shared: true do + describe "when no block is given" do + it "returns an enumerator" do + enum = "hello".send(@method) + enum.should be_an_instance_of(Enumerator) + enum.to_a.should == ['h', 'e', 'l', 'l', 'o'] + end + + describe "returned enumerator" do + describe "size" do + it "should return the size of the string" do + str = "hello" + str.send(@method).size.should == str.size + str = "ola" + str.send(@method).size.should == str.size + str = "\303\207\342\210\202\303\251\306\222g" + str.send(@method).size.should == str.size + end + end + end + end +end diff --git a/spec/ruby/core/string/shared/each_codepoint_without_block.rb b/spec/ruby/core/string/shared/each_codepoint_without_block.rb new file mode 100644 index 0000000000..c88e5c54c7 --- /dev/null +++ b/spec/ruby/core/string/shared/each_codepoint_without_block.rb @@ -0,0 +1,33 @@ +# encoding: binary +describe :string_each_codepoint_without_block, shared: true do + describe "when no block is given" do + it "returns an Enumerator" do + "".send(@method).should be_an_instance_of(Enumerator) + end + + it "returns an Enumerator even when self has an invalid encoding" do + s = "\xDF".dup.force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + s.send(@method).should be_an_instance_of(Enumerator) + end + + describe "returned Enumerator" do + describe "size" do + it "should return the size of the string" do + str = "hello" + str.send(@method).size.should == str.size + str = "ola" + str.send(@method).size.should == str.size + str = "\303\207\342\210\202\303\251\306\222g" + str.send(@method).size.should == str.size + end + + it "should return the size of the string even when the string has an invalid encoding" do + s = "\xDF".dup.force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + s.send(@method).size.should == 1 + end + end + end + end +end diff --git a/spec/ruby/core/string/shared/each_line.rb b/spec/ruby/core/string/shared/each_line.rb new file mode 100644 index 0000000000..231a6d9d4f --- /dev/null +++ b/spec/ruby/core/string/shared/each_line.rb @@ -0,0 +1,162 @@ +describe :string_each_line, shared: true do + it "splits using default newline separator when none is specified" do + a = [] + "one\ntwo\r\nthree".send(@method) { |s| a << s } + a.should == ["one\n", "two\r\n", "three"] + + b = [] + "hello\n\n\nworld".send(@method) { |s| b << s } + b.should == ["hello\n", "\n", "\n", "world"] + + c = [] + "\n\n\n\n\n".send(@method) {|s| c << s} + c.should == ["\n", "\n", "\n", "\n", "\n"] + end + + it "splits self using the supplied record separator and passes each substring to the block" do + a = [] + "one\ntwo\r\nthree".send(@method, "\n") { |s| a << s } + a.should == ["one\n", "two\r\n", "three"] + + b = [] + "hello\nworld".send(@method, 'l') { |s| b << s } + b.should == [ "hel", "l", "o\nworl", "d" ] + + c = [] + "hello\n\n\nworld".send(@method, "\n") { |s| c << s } + c.should == ["hello\n", "\n", "\n", "world"] + end + + it "splits strings containing multibyte characters" do + s = <<~EOS + foo + 🤡🤡🤡🤡🤡🤡🤡 + bar + baz + EOS + + b = [] + s.send(@method) { |part| b << part } + b.should == ["foo\n", "🤡🤡🤡🤡🤡🤡🤡\n", "bar\n", "baz\n"] + end + + it "passes self as a whole to the block if the separator is nil" do + a = [] + "one\ntwo\r\nthree".send(@method, nil) { |s| a << s } + a.should == ["one\ntwo\r\nthree"] + end + + it "yields paragraphs (broken by 2 or more successive newlines) when passed '' and replaces multiple newlines with only two ones" do + a = [] + "hello\nworld\n\n\nand\nuniverse\n\n\n\n\n".send(@method, '') { |s| a << s } + a.should == ["hello\nworld\n\n", "and\nuniverse\n\n"] + + a = [] + "hello\nworld\n\n\nand\nuniverse\n\n\n\n\ndog".send(@method, '') { |s| a << s } + a.should == ["hello\nworld\n\n", "and\nuniverse\n\n", "dog"] + end + + describe "uses $/" do + before :each do + @before_separator = $/ + end + + after :each do + suppress_warning {$/ = @before_separator} + end + + it "as the separator when none is given" do + [ + "", "x", "x\ny", "x\ry", "x\r\ny", "x\n\r\r\ny", + "hello hullo bello" + ].each do |str| + ["", "llo", "\n", "\r", nil].each do |sep| + expected = [] + str.send(@method, sep) { |x| expected << x } + + suppress_warning {$/ = sep} + + actual = [] + suppress_warning {str.send(@method) { |x| actual << x }} + + actual.should == expected + end + end + end + end + + it "yields String instances for subclasses" do + a = [] + StringSpecs::MyString.new("hello\nworld").send(@method) { |s| a << s.class } + a.should == [String, String] + end + + it "returns self" do + s = "hello\nworld" + (s.send(@method) {}).should equal(s) + end + + it "tries to convert the separator to a string using to_str" do + separator = mock('l') + separator.should_receive(:to_str).and_return("l") + + a = [] + "hello\nworld".send(@method, separator) { |s| a << s } + a.should == [ "hel", "l", "o\nworl", "d" ] + end + + it "does not care if the string is modified while substituting" do + str = +"hello\nworld." + out = [] + str.send(@method){|x| out << x; str[-1] = '!' }.should == "hello\nworld!" + out.should == ["hello\n", "world."] + end + + it "returns Strings in the same encoding as self" do + "one\ntwo\r\nthree".encode("US-ASCII").send(@method) do |s| + s.encoding.should == Encoding::US_ASCII + end + end + + it "raises a TypeError when the separator can't be converted to a string" do + -> { "hello world".send(@method, false) {} }.should raise_error(TypeError) + -> { "hello world".send(@method, mock('x')) {} }.should raise_error(TypeError) + end + + it "accepts a string separator" do + "hello world".send(@method, ?o).to_a.should == ["hello", " wo", "rld"] + end + + it "raises a TypeError when the separator is a symbol" do + -> { "hello world".send(@method, :o).to_a }.should raise_error(TypeError) + end + + context "when `chomp` keyword argument is passed" do + it "removes new line characters when separator is not specified" do + a = [] + "hello \nworld\n".send(@method, chomp: true) { |s| a << s } + a.should == ["hello ", "world"] + + a = [] + "hello \r\nworld\r\n".send(@method, chomp: true) { |s| a << s } + a.should == ["hello ", "world"] + end + + it "removes only specified separator" do + a = [] + "hello world".send(@method, ' ', chomp: true) { |s| a << s } + a.should == ["hello", "world"] + end + + # https://bugs.ruby-lang.org/issues/14257 + it "ignores new line characters when separator is specified" do + a = [] + "hello\n world\n".send(@method, ' ', chomp: true) { |s| a << s } + a.should == ["hello\n", "world\n"] + + a = [] + "hello\r\n world\r\n".send(@method, ' ', chomp: true) { |s| a << s } + a.should == ["hello\r\n", "world\r\n"] + end + end +end diff --git a/spec/ruby/core/string/shared/each_line_without_block.rb b/spec/ruby/core/string/shared/each_line_without_block.rb new file mode 100644 index 0000000000..8e08b0390c --- /dev/null +++ b/spec/ruby/core/string/shared/each_line_without_block.rb @@ -0,0 +1,17 @@ +describe :string_each_line_without_block, shared: true do + describe "when no block is given" do + it "returns an enumerator" do + enum = "hello world".send(@method, ' ') + enum.should be_an_instance_of(Enumerator) + enum.to_a.should == ["hello ", "world"] + end + + describe "returned Enumerator" do + describe "size" do + it "should return nil" do + "hello world".send(@method, ' ').size.should == nil + end + end + end + end +end diff --git a/spec/ruby/core/string/shared/encode.rb b/spec/ruby/core/string/shared/encode.rb new file mode 100644 index 0000000000..9466308886 --- /dev/null +++ b/spec/ruby/core/string/shared/encode.rb @@ -0,0 +1,432 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +describe :string_encode, shared: true do + describe "when passed no options" do + it "transcodes to Encoding.default_internal when set" do + Encoding.default_internal = Encoding::UTF_8 + str = [0xA4, 0xA2].pack('CC').force_encoding Encoding::EUC_JP + str.send(@method).should == "あ" + end + + it "transcodes a 7-bit String despite no generic converting being available" do + -> do + Encoding::Converter.new Encoding::Emacs_Mule, Encoding::BINARY + end.should raise_error(Encoding::ConverterNotFoundError) + + Encoding.default_internal = Encoding::Emacs_Mule + str = "\x79".force_encoding Encoding::BINARY + + str.send(@method).should == "y".force_encoding(Encoding::BINARY) + end + + it "raises an Encoding::ConverterNotFoundError when no conversion is possible" do + Encoding.default_internal = Encoding::Emacs_Mule + str = [0x80].pack('C').force_encoding Encoding::BINARY + -> { str.send(@method) }.should raise_error(Encoding::ConverterNotFoundError) + end + end + + describe "when passed to encoding" do + it "accepts a String argument" do + str = [0xA4, 0xA2].pack('CC').force_encoding Encoding::EUC_JP + str.send(@method, "utf-8").should == "あ" + end + + it "calls #to_str to convert the object to an Encoding" do + enc = mock("string encode encoding") + enc.should_receive(:to_str).and_return("utf-8") + + str = [0xA4, 0xA2].pack('CC').force_encoding Encoding::EUC_JP + str.send(@method, enc).should == "あ" + end + + it "transcodes to the passed encoding" do + str = [0xA4, 0xA2].pack('CC').force_encoding Encoding::EUC_JP + str.send(@method, Encoding::UTF_8).should == "あ" + end + + it "transcodes Japanese multibyte characters" do + str = "あいうえお" + str.send(@method, Encoding::ISO_2022_JP).should == + "\e\x24\x42\x24\x22\x24\x24\x24\x26\x24\x28\x24\x2A\e\x28\x42".force_encoding(Encoding::ISO_2022_JP) + end + + it "transcodes a 7-bit String despite no generic converting being available" do + -> do + Encoding::Converter.new Encoding::Emacs_Mule, Encoding::BINARY + end.should raise_error(Encoding::ConverterNotFoundError) + + str = "\x79".force_encoding Encoding::BINARY + str.send(@method, Encoding::Emacs_Mule).should == "y".force_encoding(Encoding::BINARY) + end + + it "raises an Encoding::ConverterNotFoundError when no conversion is possible" do + str = [0x80].pack('C').force_encoding Encoding::BINARY + -> do + str.send(@method, Encoding::Emacs_Mule) + end.should raise_error(Encoding::ConverterNotFoundError) + end + + it "raises an Encoding::ConverterNotFoundError for an invalid encoding" do + -> do + "abc".send(@method, "xyz") + end.should raise_error(Encoding::ConverterNotFoundError) + end + end + + describe "when passed options" do + it "does not process transcoding options if not transcoding" do + result = "あ\ufffdあ".send(@method, undef: :replace) + result.should == "あ\ufffdあ" + end + + it "calls #to_hash to convert the object" do + options = mock("string encode options") + options.should_receive(:to_hash).and_return({ undef: :replace }) + + result = "あ\ufffdあ".send(@method, **options) + result.should == "あ\ufffdあ" + end + + it "transcodes to Encoding.default_internal when set" do + Encoding.default_internal = Encoding::UTF_8 + str = [0xA4, 0xA2].pack('CC').force_encoding Encoding::EUC_JP + str.send(@method, invalid: :replace).should == "あ" + end + + it "raises an Encoding::ConverterNotFoundError when no conversion is possible despite 'invalid: :replace, undef: :replace'" do + Encoding.default_internal = Encoding::Emacs_Mule + str = [0x80].pack('C').force_encoding Encoding::BINARY + -> do + str.send(@method, invalid: :replace, undef: :replace) + end.should raise_error(Encoding::ConverterNotFoundError) + end + + it "replaces invalid characters when replacing Emacs-Mule encoded strings" do + got = [0x80].pack('C').force_encoding('Emacs-Mule').send(@method, invalid: :replace) + + got.should == "?".encode('Emacs-Mule') + end + end + + describe "when passed to, from" do + it "transcodes between the encodings ignoring the String encoding" do + str = "あ" + result = [0xA6, 0xD0, 0x8F, 0xAB, 0xE4, 0x8F, 0xAB, 0xB1].pack('C8') + result.force_encoding Encoding::EUC_JP + str.send(@method, "euc-jp", "ibm437").should == result + end + + it "calls #to_str to convert the from object to an Encoding" do + enc = mock("string encode encoding") + enc.should_receive(:to_str).and_return("ibm437") + + str = "あ" + result = [0xA6, 0xD0, 0x8F, 0xAB, 0xE4, 0x8F, 0xAB, 0xB1].pack('C8') + result.force_encoding Encoding::EUC_JP + + str.send(@method, "euc-jp", enc).should == result + end + end + + describe "when passed to, options" do + it "replaces undefined characters in the destination encoding" do + result = "あ?あ".send(@method, Encoding::EUC_JP, undef: :replace) + # testing for: "\xA4\xA2?\xA4\xA2" + xA4xA2 = [0xA4, 0xA2].pack('CC') + result.should == "#{xA4xA2}?#{xA4xA2}".force_encoding("euc-jp") + end + + it "replaces invalid characters in the destination encoding" do + xFF = [0xFF].pack('C').force_encoding('utf-8') + "ab#{xFF}c".send(@method, Encoding::ISO_8859_1, invalid: :replace).should == "ab?c" + end + + it "calls #to_hash to convert the options object" do + options = mock("string encode options") + options.should_receive(:to_hash).and_return({ undef: :replace }) + + result = "あ?あ".send(@method, Encoding::EUC_JP, **options) + xA4xA2 = [0xA4, 0xA2].pack('CC').force_encoding('utf-8') + result.should == "#{xA4xA2}?#{xA4xA2}".force_encoding("euc-jp") + end + end + + describe "when passed to, from, options" do + it "replaces undefined characters in the destination encoding" do + str = "あ?あ".force_encoding Encoding::BINARY + result = str.send(@method, "euc-jp", "utf-8", undef: :replace) + xA4xA2 = [0xA4, 0xA2].pack('CC').force_encoding('utf-8') + result.should == "#{xA4xA2}?#{xA4xA2}".force_encoding("euc-jp") + end + + it "replaces invalid characters in the destination encoding" do + xFF = [0xFF].pack('C').force_encoding('utf-8') + str = "ab#{xFF}c".force_encoding Encoding::BINARY + str.send(@method, "iso-8859-1", "utf-8", invalid: :replace).should == "ab?c" + end + + it "calls #to_str to convert the to object to an encoding" do + to = mock("string encode to encoding") + to.should_receive(:to_str).and_return("iso-8859-1") + + xFF = [0xFF].pack('C').force_encoding('utf-8') + str = "ab#{xFF}c".force_encoding Encoding::BINARY + str.send(@method, to, "utf-8", invalid: :replace).should == "ab?c" + end + + it "calls #to_str to convert the from object to an encoding" do + from = mock("string encode to encoding") + from.should_receive(:to_str).and_return("utf-8") + + xFF = [0xFF].pack('C').force_encoding('utf-8') + str = "ab#{xFF}c".force_encoding Encoding::BINARY + str.send(@method, "iso-8859-1", from, invalid: :replace).should == "ab?c" + end + + it "calls #to_hash to convert the options object" do + options = mock("string encode options") + options.should_receive(:to_hash).and_return({ invalid: :replace }) + + xFF = [0xFF].pack('C').force_encoding('utf-8') + str = "ab#{xFF}c".force_encoding Encoding::BINARY + str.send(@method, "iso-8859-1", "utf-8", **options).should == "ab?c" + end + end + + describe "given the fallback option" do + context "given a hash" do + it "looks up the replacement value from the hash" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: { "\ufffd" => "bar" }) + encoded.should == "Bbar" + end + + it "calls to_str on the returned value" do + obj = Object.new + obj.should_receive(:to_str).and_return("bar") + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: { "\ufffd" => obj }) + encoded.should == "Bbar" + end + + it "does not call to_s on the returned value" do + obj = Object.new + obj.should_not_receive(:to_s) + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: { "\ufffd" => obj }) + }.should raise_error(TypeError, "no implicit conversion of Object into String") + end + + it "raises an error if the key is not present in the hash" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: { "foo" => "bar" }) + }.should raise_error(Encoding::UndefinedConversionError, "U+FFFD from UTF-8 to US-ASCII") + end + + it "raises an error if the value is itself invalid" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: { "\ufffd" => "\uffee" }) + }.should raise_error(ArgumentError, "too big fallback string") + end + + it "uses the hash's default value if set" do + hash = {} + hash.default = "bar" + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: hash) + encoded.should == "Bbar" + end + + it "uses the result of calling default_proc if set" do + hash = {} + hash.default_proc = -> _, _ { "bar" } + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: hash) + encoded.should == "Bbar" + end + end + + context "given an object inheriting from Hash" do + before do + klass = Class.new(Hash) + @hash_like = klass.new + @hash_like["\ufffd"] = "bar" + end + + it "looks up the replacement value from the object" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: @hash_like) + encoded.should == "Bbar" + end + end + + context "given an object responding to []" do + before do + klass = Class.new do + def [](c) = c.bytes.inspect + end + @hash_like = klass.new + end + + it "calls [] on the object, passing the invalid character" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: @hash_like) + encoded.should == "B[239, 191, 189]" + end + end + + context "given an object not responding to []" do + before do + @non_hash_like = Object.new + end + + it "raises an error" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: @non_hash_like) + }.should raise_error(Encoding::UndefinedConversionError, "U+FFFD from UTF-8 to US-ASCII") + end + end + + context "given a proc" do + it "calls the proc to get the replacement value, passing in the invalid character" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: proc { |c| c.bytes.inspect }) + encoded.should == "B[239, 191, 189]" + end + + it "calls to_str on the returned value" do + obj = Object.new + obj.should_receive(:to_str).and_return("bar") + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: proc { |c| obj }) + encoded.should == "Bbar" + end + + it "does not call to_s on the returned value" do + obj = Object.new + obj.should_not_receive(:to_s) + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: proc { |c| obj }) + }.should raise_error(TypeError, "no implicit conversion of Object into String") + end + + it "raises an error if the returned value is itself invalid" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { "\uffee" }) + }.should raise_error(ArgumentError, "too big fallback string") + end + end + + context "given a lambda" do + it "calls the lambda to get the replacement value, passing in the invalid character" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { c.bytes.inspect }) + encoded.should == "B[239, 191, 189]" + end + + it "calls to_str on the returned value" do + obj = Object.new + obj.should_receive(:to_str).and_return("bar") + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { obj }) + encoded.should == "Bbar" + end + + it "does not call to_s on the returned value" do + obj = Object.new + obj.should_not_receive(:to_s) + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { obj }) + }.should raise_error(TypeError, "no implicit conversion of Object into String") + end + + it "raises an error if the returned value is itself invalid" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: -> c { "\uffee" }) + }.should raise_error(ArgumentError, "too big fallback string") + end + end + + context "given a method" do + def replace(c) = c.bytes.inspect + def replace_bad(c) = "\uffee" + + def replace_to_str(c) + obj = Object.new + obj.should_receive(:to_str).and_return("bar") + obj + end + + def replace_to_s(c) + obj = Object.new + obj.should_not_receive(:to_s) + obj + end + + it "calls the method to get the replacement value, passing in the invalid character" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: method(:replace)) + encoded.should == "B[239, 191, 189]" + end + + it "calls to_str on the returned value" do + encoded = "B\ufffd".encode(Encoding::US_ASCII, fallback: method(:replace_to_str)) + encoded.should == "Bbar" + end + + it "does not call to_s on the returned value" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: method(:replace_to_s)) + }.should raise_error(TypeError, "no implicit conversion of Object into String") + end + + it "raises an error if the returned value is itself invalid" do + -> { + "B\ufffd".encode(Encoding::US_ASCII, fallback: method(:replace_bad)) + }.should raise_error(ArgumentError, "too big fallback string") + end + end + end + + describe "given the xml: :text option" do + it "replaces all instances of '&' with '&'" do + '& and &'.send(@method, "UTF-8", xml: :text).should == '& and &' + end + + it "replaces all instances of '<' with '<'" do + '< and <'.send(@method, "UTF-8", xml: :text).should == '< and <' + end + + it "replaces all instances of '>' with '>'" do + '> and >'.send(@method, "UTF-8", xml: :text).should == '> and >' + end + + it "does not replace '\"'" do + '" and "'.send(@method, "UTF-8", xml: :text).should == '" and "' + end + + it "replaces undefined characters with their upper-case hexadecimal numeric character references" do + 'ürst'.send(@method, Encoding::US_ASCII, xml: :text).should == 'ürst' + end + end + + describe "given the xml: :attr option" do + it "surrounds the encoded text with double-quotes" do + 'abc'.send(@method, "UTF-8", xml: :attr).should == '"abc"' + end + + it "replaces all instances of '&' with '&'" do + '& and &'.send(@method, "UTF-8", xml: :attr).should == '"& and &"' + end + + it "replaces all instances of '<' with '<'" do + '< and <'.send(@method, "UTF-8", xml: :attr).should == '"< and <"' + end + + it "replaces all instances of '>' with '>'" do + '> and >'.send(@method, "UTF-8", xml: :attr).should == '"> and >"' + end + + it "replaces all instances of '\"' with '"'" do + '" and "'.send(@method, "UTF-8", xml: :attr).should == '"" and ""' + end + + it "replaces undefined characters with their upper-case hexadecimal numeric character references" do + 'ürst'.send(@method, Encoding::US_ASCII, xml: :attr).should == '"ürst"' + end + end + + it "raises ArgumentError if the value of the :xml option is not :text or :attr" do + -> { ''.send(@method, "UTF-8", xml: :other) }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/shared/eql.rb b/spec/ruby/core/string/shared/eql.rb new file mode 100644 index 0000000000..d5af337d53 --- /dev/null +++ b/spec/ruby/core/string/shared/eql.rb @@ -0,0 +1,38 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' + +describe :string_eql_value, shared: true do + it "returns true if self <=> string returns 0" do + 'hello'.send(@method, 'hello').should be_true + end + + it "returns false if self <=> string does not return 0" do + "more".send(@method, "MORE").should be_false + "less".send(@method, "greater").should be_false + end + + it "ignores encoding difference of compatible string" do + "hello".dup.force_encoding("utf-8").send(@method, "hello".dup.force_encoding("iso-8859-1")).should be_true + end + + it "considers encoding difference of incompatible string" do + "\xff".dup.force_encoding("utf-8").send(@method, "\xff".dup.force_encoding("iso-8859-1")).should be_false + end + + it "considers encoding compatibility" do + "abcd".dup.force_encoding("utf-8").send(@method, "abcd".dup.force_encoding("utf-32le")).should be_false + end + + it "ignores subclass differences" do + a = "hello" + b = StringSpecs::MyString.new("hello") + + a.send(@method, b).should be_true + b.send(@method, a).should be_true + end + + it "returns true when comparing 2 empty strings but one is not ASCII-compatible" do + "".send(@method, "".dup.force_encoding('iso-2022-jp')).should == true + end +end diff --git a/spec/ruby/core/string/shared/equal_value.rb b/spec/ruby/core/string/shared/equal_value.rb new file mode 100644 index 0000000000..fccafb5821 --- /dev/null +++ b/spec/ruby/core/string/shared/equal_value.rb @@ -0,0 +1,29 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' + +describe :string_equal_value, shared: true do + it "returns false if obj does not respond to to_str" do + 'hello'.send(@method, 5).should be_false + not_supported_on :opal do + 'hello'.send(@method, :hello).should be_false + end + 'hello'.send(@method, mock('x')).should be_false + end + + it "returns obj == self if obj responds to to_str" do + obj = Object.new + + # String#== merely checks if #to_str is defined. It does + # not call it. + obj.stub!(:to_str) + + # Don't use @method for :== in `obj.should_receive(:==)` + obj.should_receive(:==).and_return(true) + + 'hello'.send(@method, obj).should be_true + end + + it "is not fooled by NUL characters" do + "abc\0def".send(@method, "abc\0xyz").should be_false + end +end diff --git a/spec/ruby/core/string/shared/grapheme_clusters.rb b/spec/ruby/core/string/shared/grapheme_clusters.rb new file mode 100644 index 0000000000..8b666868b1 --- /dev/null +++ b/spec/ruby/core/string/shared/grapheme_clusters.rb @@ -0,0 +1,16 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' + +describe :string_grapheme_clusters, shared: true do + it "passes each grapheme cluster in self to the given block" do + a = [] + # test string: abc[rainbow flag emoji][paw prints] + "ab\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}\u{1F43E}".send(@method) { |c| a << c } + a.should == ['a', 'b', "\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}", "\u{1F43E}"] + end + + it "returns self" do + s = StringSpecs::MyString.new "ab\u{1f3f3}\u{fe0f}\u{200d}\u{1f308}\u{1F43E}" + s.send(@method) {}.should equal(s) + end +end diff --git a/spec/ruby/core/string/shared/length.rb b/spec/ruby/core/string/shared/length.rb new file mode 100644 index 0000000000..ae572ba755 --- /dev/null +++ b/spec/ruby/core/string/shared/length.rb @@ -0,0 +1,55 @@ +# encoding: utf-8 + +describe :string_length, shared: true do + it "returns the length of self" do + "".send(@method).should == 0 + "\x00".send(@method).should == 1 + "one".send(@method).should == 3 + "two".send(@method).should == 3 + "three".send(@method).should == 5 + "four".send(@method).should == 4 + end + + it "returns the length of a string in different encodings" do + utf8_str = 'こにちわ' * 100 + utf8_str.send(@method).should == 400 + utf8_str.encode(Encoding::UTF_32BE).send(@method).should == 400 + utf8_str.encode(Encoding::SHIFT_JIS).send(@method).should == 400 + end + + it "returns the length of the new self after encoding is changed" do + str = +'こにちわ' + str.send(@method) + + str.force_encoding('BINARY').send(@method).should == 12 + end + + it "returns the correct length after force_encoding(BINARY)" do + utf8 = "あ" + ascii = "a" + concat = utf8 + ascii + + concat.encoding.should == Encoding::UTF_8 + concat.bytesize.should == 4 + + concat.send(@method).should == 2 + concat.force_encoding(Encoding::ASCII_8BIT) + concat.send(@method).should == 4 + end + + it "adds 1 for every invalid byte in UTF-8" do + "\xF4\x90\x80\x80".send(@method).should == 4 + "a\xF4\x90\x80\x80b".send(@method).should == 6 + "é\xF4\x90\x80\x80è".send(@method).should == 6 + end + + it "adds 1 (and not 2) for a incomplete surrogate in UTF-16" do + "\x00\xd8".dup.force_encoding("UTF-16LE").send(@method).should == 1 + "\xd8\x00".dup.force_encoding("UTF-16BE").send(@method).should == 1 + end + + it "adds 1 for a broken sequence in UTF-32" do + "\x04\x03\x02\x01".dup.force_encoding("UTF-32LE").send(@method).should == 1 + "\x01\x02\x03\x04".dup.force_encoding("UTF-32BE").send(@method).should == 1 + end +end diff --git a/spec/ruby/core/string/shared/partition.rb b/spec/ruby/core/string/shared/partition.rb new file mode 100644 index 0000000000..4cac149ce5 --- /dev/null +++ b/spec/ruby/core/string/shared/partition.rb @@ -0,0 +1,33 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' + +describe :string_partition, shared: true do + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("hello").send(@method, "l").each do |item| + item.should be_an_instance_of(String) + end + + StringSpecs::MyString.new("hello").send(@method, "x").each do |item| + item.should be_an_instance_of(String) + end + + StringSpecs::MyString.new("hello").send(@method, /l./).each do |item| + item.should be_an_instance_of(String) + end + end + + it "returns before- and after- parts in the same encoding as self" do + strings = "hello".encode("US-ASCII").send(@method, "ello") + strings[0].encoding.should == Encoding::US_ASCII + strings[2].encoding.should == Encoding::US_ASCII + + strings = "hello".encode("US-ASCII").send(@method, /ello/) + strings[0].encoding.should == Encoding::US_ASCII + strings[2].encoding.should == Encoding::US_ASCII + end + + it "returns the matching part in the separator's encoding" do + strings = "hello".encode("US-ASCII").send(@method, "ello") + strings[1].encoding.should == Encoding::UTF_8 + end +end diff --git a/spec/ruby/core/string/shared/replace.rb b/spec/ruby/core/string/shared/replace.rb new file mode 100644 index 0000000000..24dac0eb27 --- /dev/null +++ b/spec/ruby/core/string/shared/replace.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: false +describe :string_replace, shared: true do + it "returns self" do + a = "a" + a.send(@method, "b").should equal(a) + end + + it "replaces the content of self with other" do + a = "some string" + a.send(@method, "another string") + a.should == "another string" + end + + it "replaces the encoding of self with that of other" do + a = "".encode("UTF-16LE") + b = "".encode("UTF-8") + a.send(@method, b) + a.encoding.should == Encoding::UTF_8 + end + + it "carries over the encoding invalidity" do + a = "\u{8765}".force_encoding('ascii') + "".send(@method, a).valid_encoding?.should be_false + end + + it "tries to convert other to string using to_str" do + other = mock('x') + other.should_receive(:to_str).and_return("converted to a string") + "hello".send(@method, other).should == "converted to a string" + end + + it "raises a TypeError if other can't be converted to string" do + -> { "hello".send(@method, 123) }.should raise_error(TypeError) + -> { "hello".send(@method, []) }.should raise_error(TypeError) + -> { "hello".send(@method, mock('x')) }.should raise_error(TypeError) + end + + it "raises a FrozenError on a frozen instance that is modified" do + a = "hello".freeze + -> { a.send(@method, "world") }.should raise_error(FrozenError) + end + + # see [ruby-core:23666] + it "raises a FrozenError on a frozen instance when self-replacing" do + a = "hello".freeze + -> { a.send(@method, a) }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/shared/slice.rb b/spec/ruby/core/string/shared/slice.rb new file mode 100644 index 0000000000..7b9b9f6a14 --- /dev/null +++ b/spec/ruby/core/string/shared/slice.rb @@ -0,0 +1,517 @@ +describe :string_slice, shared: true do + it "returns the character code of the character at the given index" do + "hello".send(@method, 0).should == ?h + "hello".send(@method, -1).should == ?o + end + + it "returns nil if index is outside of self" do + "hello".send(@method, 20).should == nil + "hello".send(@method, -20).should == nil + + "".send(@method, 0).should == nil + "".send(@method, -1).should == nil + end + + it "calls to_int on the given index" do + "hello".send(@method, 0.5).should == ?h + + obj = mock('1') + obj.should_receive(:to_int).and_return(1) + "hello".send(@method, obj).should == ?e + end + + it "raises a TypeError if the given index is nil" do + -> { "hello".send(@method, nil) }.should raise_error(TypeError) + end + + it "raises a TypeError if the given index can't be converted to an Integer" do + -> { "hello".send(@method, mock('x')) }.should raise_error(TypeError) + -> { "hello".send(@method, {}) }.should raise_error(TypeError) + -> { "hello".send(@method, []) }.should raise_error(TypeError) + end + + it "raises a RangeError if the index is too big" do + -> { "hello".send(@method, bignum_value) }.should raise_error(RangeError) + end +end + +describe :string_slice_index_length, shared: true do + it "returns the substring starting at the given index with the given length" do + "hello there".send(@method, 0,0).should == "" + "hello there".send(@method, 0,1).should == "h" + "hello there".send(@method, 0,3).should == "hel" + "hello there".send(@method, 0,6).should == "hello " + "hello there".send(@method, 0,9).should == "hello the" + "hello there".send(@method, 0,12).should == "hello there" + + "hello there".send(@method, 1,0).should == "" + "hello there".send(@method, 1,1).should == "e" + "hello there".send(@method, 1,3).should == "ell" + "hello there".send(@method, 1,6).should == "ello t" + "hello there".send(@method, 1,9).should == "ello ther" + "hello there".send(@method, 1,12).should == "ello there" + + "hello there".send(@method, 3,0).should == "" + "hello there".send(@method, 3,1).should == "l" + "hello there".send(@method, 3,3).should == "lo " + "hello there".send(@method, 3,6).should == "lo the" + "hello there".send(@method, 3,9).should == "lo there" + + "hello there".send(@method, 4,0).should == "" + "hello there".send(@method, 4,3).should == "o t" + "hello there".send(@method, 4,6).should == "o ther" + "hello there".send(@method, 4,9).should == "o there" + + "foo".send(@method, 2,1).should == "o" + "foo".send(@method, 3,0).should == "" + "foo".send(@method, 3,1).should == "" + + "".send(@method, 0,0).should == "" + "".send(@method, 0,1).should == "" + + "x".send(@method, 0,0).should == "" + "x".send(@method, 0,1).should == "x" + "x".send(@method, 1,0).should == "" + "x".send(@method, 1,1).should == "" + + "x".send(@method, -1,0).should == "" + "x".send(@method, -1,1).should == "x" + + "hello there".send(@method, -3,2).should == "er" + end + + it "returns a string with the same encoding as self" do + s = "hello there" + s.send(@method, 1, 9).encoding.should == s.encoding + + a = "hello".dup.force_encoding("binary") + b = " there".dup.force_encoding("ISO-8859-1") + c = (a + b).force_encoding(Encoding::US_ASCII) + + c.send(@method, 0, 5).encoding.should == Encoding::US_ASCII + c.send(@method, 5, 6).encoding.should == Encoding::US_ASCII + c.send(@method, 1, 3).encoding.should == Encoding::US_ASCII + c.send(@method, 8, 2).encoding.should == Encoding::US_ASCII + c.send(@method, 1, 10).encoding.should == Encoding::US_ASCII + end + + it "returns nil if the offset falls outside of self" do + "hello there".send(@method, 20,3).should == nil + "hello there".send(@method, -20,3).should == nil + + "".send(@method, 1,0).should == nil + "".send(@method, 1,1).should == nil + + "".send(@method, -1,0).should == nil + "".send(@method, -1,1).should == nil + + "x".send(@method, 2,0).should == nil + "x".send(@method, 2,1).should == nil + + "x".send(@method, -2,0).should == nil + "x".send(@method, -2,1).should == nil + + "x".send(@method, fixnum_max, 1).should == nil + end + + it "returns nil if the length is negative" do + "hello there".send(@method, 4,-3).should == nil + "hello there".send(@method, -4,-3).should == nil + end + + platform_is pointer_size: 64 do + it "returns nil if the length is negative big value" do + "hello there".send(@method, 4, -(1 << 31)).should == nil + + # by some reason length < -(1 << 31) on CI on Windows leads to + # 'RangeError: bignum too big to convert into `long'' error + platform_is_not :windows do + "hello there".send(@method, 4, -(1 << 63)).should == nil + end + end + end + + it "calls to_int on the given index and the given length" do + "hello".send(@method, 0.5, 1).should == "h" + "hello".send(@method, 0.5, 2.5).should == "he" + "hello".send(@method, 1, 2.5).should == "el" + + obj = mock('2') + obj.should_receive(:to_int).exactly(4).times.and_return(2) + + "hello".send(@method, obj, 1).should == "l" + "hello".send(@method, obj, obj).should == "ll" + "hello".send(@method, 0, obj).should == "he" + end + + it "raises a TypeError when idx or length can't be converted to an integer" do + -> { "hello".send(@method, mock('x'), 0) }.should raise_error(TypeError) + -> { "hello".send(@method, 0, mock('x')) }.should raise_error(TypeError) + + # I'm deliberately including this here. + # It means that str.send(@method, other, idx) isn't supported. + -> { "hello".send(@method, "", 0) }.should raise_error(TypeError) + end + + it "raises a TypeError when the given index or the given length is nil" do + -> { "hello".send(@method, 1, nil) }.should raise_error(TypeError) + -> { "hello".send(@method, nil, 1) }.should raise_error(TypeError) + -> { "hello".send(@method, nil, nil) }.should raise_error(TypeError) + end + + it "raises a RangeError if the index or length is too big" do + -> { "hello".send(@method, bignum_value, 1) }.should raise_error(RangeError) + -> { "hello".send(@method, 0, bignum_value) }.should raise_error(RangeError) + end + + it "raises a RangeError if the index or length is too small" do + -> { "hello".send(@method, -bignum_value, 1) }.should raise_error(RangeError) + -> { "hello".send(@method, 0, -bignum_value) }.should raise_error(RangeError) + end + + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, 0,0).should be_an_instance_of(String) + s.send(@method, 0,4).should be_an_instance_of(String) + s.send(@method, 1,4).should be_an_instance_of(String) + end + + it "handles repeated application" do + "hello world".send(@method, 6, 5).send(@method, 0, 1).should == 'w' + "hello world".send(@method, 6, 5).send(@method, 0, 5).should == 'world' + + "hello world".send(@method, 6, 5).send(@method, 1, 1).should == 'o' + "hello world".send(@method, 6, 5).send(@method, 1, 4).should == 'orld' + + "hello world".send(@method, 6, 5).send(@method, 4, 1).should == 'd' + "hello world".send(@method, 6, 5).send(@method, 5, 0).should == '' + + "hello world".send(@method, 6, 0).send(@method, -1, 0).should == nil + "hello world".send(@method, 6, 0).send(@method, 1, 1).should == nil + end +end + +describe :string_slice_range, shared: true do + it "returns the substring given by the offsets of the range" do + "hello there".send(@method, 1..1).should == "e" + "hello there".send(@method, 1..3).should == "ell" + "hello there".send(@method, 1...3).should == "el" + "hello there".send(@method, -4..-2).should == "her" + "hello there".send(@method, -4...-2).should == "he" + "hello there".send(@method, 5..-1).should == " there" + "hello there".send(@method, 5...-1).should == " ther" + + "".send(@method, 0..0).should == "" + + "x".send(@method, 0..0).should == "x" + "x".send(@method, 0..1).should == "x" + "x".send(@method, 0...1).should == "x" + "x".send(@method, 0..-1).should == "x" + + "x".send(@method, 1..1).should == "" + "x".send(@method, 1..-1).should == "" + end + + it "returns a String in the same encoding as self" do + "hello there".encode("US-ASCII").send(@method, 1..1).encoding.should == Encoding::US_ASCII + end + + it "returns nil if the beginning of the range falls outside of self" do + "hello there".send(@method, 12..-1).should == nil + "hello there".send(@method, 20..25).should == nil + "hello there".send(@method, 20..1).should == nil + "hello there".send(@method, -20..1).should == nil + "hello there".send(@method, -20..-1).should == nil + + "".send(@method, -1..-1).should == nil + "".send(@method, -1...-1).should == nil + "".send(@method, -1..0).should == nil + "".send(@method, -1...0).should == nil + end + + it "returns an empty string if range.begin is inside self and > real end" do + "hello there".send(@method, 1...1).should == "" + "hello there".send(@method, 4..2).should == "" + "hello".send(@method, 4..-4).should == "" + "hello there".send(@method, -5..-6).should == "" + "hello there".send(@method, -2..-4).should == "" + "hello there".send(@method, -5..-6).should == "" + "hello there".send(@method, -5..2).should == "" + + "".send(@method, 0...0).should == "" + "".send(@method, 0..-1).should == "" + "".send(@method, 0...-1).should == "" + + "x".send(@method, 0...0).should == "" + "x".send(@method, 0...-1).should == "" + "x".send(@method, 1...1).should == "" + "x".send(@method, 1...-1).should == "" + end + + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, 0...0).should be_an_instance_of(String) + s.send(@method, 0..4).should be_an_instance_of(String) + s.send(@method, 1..4).should be_an_instance_of(String) + end + + it "calls to_int on range arguments" do + from = mock('from') + to = mock('to') + + # So we can construct a range out of them... + from.should_receive(:<=>).twice.and_return(0) + + from.should_receive(:to_int).twice.and_return(1) + to.should_receive(:to_int).twice.and_return(-2) + + "hello there".send(@method, from..to).should == "ello ther" + "hello there".send(@method, from...to).should == "ello the" + end + + it "works with Range subclasses" do + a = "GOOD" + range_incl = StringSpecs::MyRange.new(1, 2) + range_excl = StringSpecs::MyRange.new(-3, -1, true) + + a.send(@method, range_incl).should == "OO" + a.send(@method, range_excl).should == "OO" + end + + it "handles repeated application" do + "hello world".send(@method, 6..11).send(@method, 0..0).should == 'w' + "hello world".send(@method, 6..11).send(@method, 0..4).should == 'world' + + "hello world".send(@method, 6..11).send(@method, 1..1).should == 'o' + "hello world".send(@method, 6..11).send(@method, 1..4).should == 'orld' + + "hello world".send(@method, 6..11).send(@method, 4..4).should == 'd' + "hello world".send(@method, 6..11).send(@method, 5..4).should == '' + + "hello world".send(@method, 6..5).send(@method, -1..-1).should == nil + "hello world".send(@method, 6..5).send(@method, 1..1).should == nil + end + + it "raises a type error if a range is passed with a length" do + ->{ "hello".send(@method, 1..2, 1) }.should raise_error(TypeError) + end + + it "raises a RangeError if one of the bound is too big" do + -> { "hello".send(@method, bignum_value..(bignum_value + 1)) }.should raise_error(RangeError) + -> { "hello".send(@method, 0..bignum_value) }.should raise_error(RangeError) + end + + it "works with endless ranges" do + "hello there".send(@method, eval("(2..)")).should == "llo there" + "hello there".send(@method, eval("(2...)")).should == "llo there" + "hello there".send(@method, eval("(-4..)")).should == "here" + "hello there".send(@method, eval("(-4...)")).should == "here" + end + + it "works with beginless ranges" do + "hello there".send(@method, (..5)).should == "hello " + "hello there".send(@method, (...5)).should == "hello" + "hello there".send(@method, (..-4)).should == "hello th" + "hello there".send(@method, (...-4)).should == "hello t" + "hello there".send(@method, (...nil)).should == "hello there" + end +end + +describe :string_slice_regexp, shared: true do + it "returns the matching portion of self" do + "hello there".send(@method, /[aeiou](.)\1/).should == "ell" + "".send(@method, //).should == "" + end + + it "returns nil if there is no match" do + "hello there".send(@method, /xyz/).should == nil + end + + it "returns a String in the same encoding as self" do + "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/).encoding.should == Encoding::US_ASCII + end + + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, //).should be_an_instance_of(String) + s.send(@method, /../).should be_an_instance_of(String) + end + + it "sets $~ to MatchData when there is a match and nil when there's none" do + 'hello'.send(@method, /./) + $~[0].should == 'h' + + 'hello'.send(@method, /not/) + $~.should == nil + end +end + +describe :string_slice_regexp_index, shared: true do + it "returns the capture for the given index" do + "hello there".send(@method, /[aeiou](.)\1/, 0).should == "ell" + "hello there".send(@method, /[aeiou](.)\1/, 1).should == "l" + "hello there".send(@method, /[aeiou](.)\1/, -1).should == "l" + + "har".send(@method, /(.)(.)(.)/, 0).should == "har" + "har".send(@method, /(.)(.)(.)/, 1).should == "h" + "har".send(@method, /(.)(.)(.)/, 2).should == "a" + "har".send(@method, /(.)(.)(.)/, 3).should == "r" + "har".send(@method, /(.)(.)(.)/, -1).should == "r" + "har".send(@method, /(.)(.)(.)/, -2).should == "a" + "har".send(@method, /(.)(.)(.)/, -3).should == "h" + end + + it "returns nil if there is no match" do + "hello there".send(@method, /(what?)/, 1).should == nil + end + + it "returns nil if the index is larger than the number of captures" do + "hello there".send(@method, /hello (.)/, 2).should == nil + # You can't refer to 0 using negative indices + "hello there".send(@method, /hello (.)/, -2).should == nil + end + + it "returns nil if there is no capture for the given index" do + "hello there".send(@method, /[aeiou](.)\1/, 2).should == nil + end + + it "returns nil if the given capture group was not matched but still sets $~" do + "test".send(@method, /te(z)?/, 1).should == nil + $~[0].should == "te" + $~[1].should == nil + end + + it "returns a String in the same encoding as self" do + "hello there".encode("US-ASCII").send(@method, /[aeiou](.)\1/, 0).encoding.should == Encoding::US_ASCII + end + + it "calls to_int on the given index" do + obj = mock('2') + obj.should_receive(:to_int).and_return(2) + + "har".send(@method, /(.)(.)(.)/, 1.5).should == "h" + "har".send(@method, /(.)(.)(.)/, obj).should == "a" + end + + it "raises a TypeError when the given index can't be converted to Integer" do + -> { "hello".send(@method, /(.)(.)(.)/, mock('x')) }.should raise_error(TypeError) + -> { "hello".send(@method, /(.)(.)(.)/, {}) }.should raise_error(TypeError) + -> { "hello".send(@method, /(.)(.)(.)/, []) }.should raise_error(TypeError) + end + + it "raises a TypeError when the given index is nil" do + -> { "hello".send(@method, /(.)(.)(.)/, nil) }.should raise_error(TypeError) + end + + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, /(.)(.)/, 0).should be_an_instance_of(String) + s.send(@method, /(.)(.)/, 1).should be_an_instance_of(String) + end + + it "sets $~ to MatchData when there is a match and nil when there's none" do + 'hello'.send(@method, /.(.)/, 0) + $~[0].should == 'he' + + 'hello'.send(@method, /.(.)/, 1) + $~[1].should == 'e' + + 'hello'.send(@method, /not/, 0) + $~.should == nil + end +end + +describe :string_slice_string, shared: true do + it "returns other_str if it occurs in self" do + s = "lo" + "hello there".send(@method, s).should == s + end + + it "doesn't set $~" do + $~ = nil + + 'hello'.send(@method, 'll') + $~.should == nil + end + + it "returns nil if there is no match" do + "hello there".send(@method, "bye").should == nil + end + + it "doesn't call to_str on its argument" do + o = mock('x') + o.should_not_receive(:to_str) + + -> { "hello".send(@method, o) }.should raise_error(TypeError) + end + + it "returns a String instance when given a subclass instance" do + s = StringSpecs::MyString.new("el") + r = "hello".send(@method, s) + r.should == "el" + r.should be_an_instance_of(String) + end +end + +describe :string_slice_regexp_group, shared: true do + not_supported_on :opal do + it "returns the capture for the given name" do + "hello there".send(@method, /(?<g>[aeiou](.))/, 'g').should == "el" + "hello there".send(@method, /[aeiou](?<g>.)/, 'g').should == "l" + + "har".send(@method, /(?<g>(.)(.)(.))/, 'g').should == "har" + "har".send(@method, /(?<h>.)(.)(.)/, 'h').should == "h" + "har".send(@method, /(.)(?<a>.)(.)/, 'a').should == "a" + "har".send(@method, /(.)(.)(?<r>.)/, 'r').should == "r" + "har".send(@method, /(?<h>.)(?<a>.)(?<r>.)/, 'r').should == "r" + end + + it "returns the last capture for duplicate names" do + "hello there".send(@method, /(?<g>h)(?<g>.)/, 'g').should == "e" + "hello there".send(@method, /(?<g>h)(?<g>.)(?<f>.)/, 'g').should == "e" + end + + it "returns the innermost capture for nested duplicate names" do + "hello there".send(@method, /(?<g>h(?<g>.))/, 'g').should == "e" + end + + it "returns nil if there is no match" do + "hello there".send(@method, /(?<whut>what?)/, 'whut').should be_nil + end + + it "raises an IndexError if there is no capture for the given name" do + -> do + "hello there".send(@method, /[aeiou](.)\1/, 'non') + end.should raise_error(IndexError) + end + + it "raises a TypeError when the given name is not a String" do + -> { "hello".send(@method, /(?<q>.)/, mock('x')) }.should raise_error(TypeError) + -> { "hello".send(@method, /(?<q>.)/, {}) }.should raise_error(TypeError) + -> { "hello".send(@method, /(?<q>.)/, []) }.should raise_error(TypeError) + end + + it "raises an IndexError when given the empty String as a group name" do + -> { "hello".send(@method, /(?<q>)/, '') }.should raise_error(IndexError) + end + + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.send(@method, /(?<q>.)/, 'q').should be_an_instance_of(String) + end + + it "sets $~ to MatchData when there is a match and nil when there's none" do + 'hello'.send(@method, /(?<hi>.(.))/, 'hi') + $~[0].should == 'he' + + 'hello'.send(@method, /(?<non>not)/, 'non') + $~.should be_nil + end + end +end + +describe :string_slice_symbol, shared: true do + it "raises TypeError" do + -> { 'hello'.send(@method, :hello) }.should raise_error(TypeError) + end +end diff --git a/spec/ruby/core/string/shared/strip.rb b/spec/ruby/core/string/shared/strip.rb new file mode 100644 index 0000000000..3af77b50fe --- /dev/null +++ b/spec/ruby/core/string/shared/strip.rb @@ -0,0 +1,14 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' + +describe :string_strip, shared: true do + it "returns a String in the same encoding as self" do + " hello ".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new(" hello ").send(@method).should be_an_instance_of(String) + StringSpecs::MyString.new(" ").send(@method).should be_an_instance_of(String) + StringSpecs::MyString.new("").send(@method).should be_an_instance_of(String) + end +end diff --git a/spec/ruby/core/string/shared/succ.rb b/spec/ruby/core/string/shared/succ.rb new file mode 100644 index 0000000000..7c68345f10 --- /dev/null +++ b/spec/ruby/core/string/shared/succ.rb @@ -0,0 +1,87 @@ +# encoding: binary +describe :string_succ, shared: true do + it "returns an empty string for empty strings" do + "".send(@method).should == "" + end + + it "returns the successor by increasing the rightmost alphanumeric (digit => digit, letter => letter with same case)" do + "abcd".send(@method).should == "abce" + "THX1138".send(@method).should == "THX1139" + + "<<koala>>".send(@method).should == "<<koalb>>" + "==A??".send(@method).should == "==B??" + end + + it "increases non-alphanumerics (via ascii rules) if there are no alphanumerics" do + "***".send(@method).should == "**+" + "**`".send(@method).should == "**a" + end + + it "increases the next best alphanumeric (jumping over non-alphanumerics) if there is a carry" do + "dz".send(@method).should == "ea" + "HZ".send(@method).should == "IA" + "49".send(@method).should == "50" + + "izz".send(@method).should == "jaa" + "IZZ".send(@method).should == "JAA" + "699".send(@method).should == "700" + + "6Z99z99Z".send(@method).should == "7A00a00A" + + "1999zzz".send(@method).should == "2000aaa" + "NZ/[]ZZZ9999".send(@method).should == "OA/[]AAA0000" + end + + it "increases the next best character if there is a carry for non-alphanumerics" do + "(\xFF".send(@method).should == ")\x00" + "`\xFF".send(@method).should == "a\x00" + "<\xFF\xFF".send(@method).should == "=\x00\x00" + end + + it "adds an additional character (just left to the last increased one) if there is a carry and no character left to increase" do + "z".send(@method).should == "aa" + "Z".send(@method).should == "AA" + "9".send(@method).should == "10" + + "zz".send(@method).should == "aaa" + "ZZ".send(@method).should == "AAA" + "99".send(@method).should == "100" + + "9Z99z99Z".send(@method).should == "10A00a00A" + + "ZZZ9999".send(@method).should == "AAAA0000" + "/[]9999".send(@method).should == "/[]10000" + "/[]ZZZ9999".send(@method).should == "/[]AAAA0000" + "Z/[]ZZZ9999".send(@method).should == "AA/[]AAA0000" + + # non-alphanumeric cases + "\xFF".send(@method).should == "\x01\x00" + "\xFF\xFF".send(@method).should == "\x01\x00\x00" + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("").send(@method).should be_an_instance_of(String) + StringSpecs::MyString.new("a").send(@method).should be_an_instance_of(String) + StringSpecs::MyString.new("z").send(@method).should be_an_instance_of(String) + end + + it "returns a String in the same encoding as self" do + "z".encode("US-ASCII").send(@method).encoding.should == Encoding::US_ASCII + end +end + +describe :string_succ_bang, shared: true do + it "is equivalent to succ, but modifies self in place (still returns self)" do + ["", "abcd", "THX1138"].each do |s| + s = +s + r = s.dup.send(@method) + s.send(@method).should equal(s) + s.should == r + end + end + + it "raises a FrozenError if self is frozen" do + -> { "".freeze.send(@method) }.should raise_error(FrozenError) + -> { "abcd".freeze.send(@method) }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/shared/to_s.rb b/spec/ruby/core/string/shared/to_s.rb new file mode 100644 index 0000000000..4b87a6cbe1 --- /dev/null +++ b/spec/ruby/core/string/shared/to_s.rb @@ -0,0 +1,13 @@ +describe :string_to_s, shared: true do + it "returns self when self.class == String" do + a = "a string" + a.should equal(a.send(@method)) + end + + it "returns a new instance of String when called on a subclass" do + a = StringSpecs::MyString.new("a string") + s = a.send(@method) + s.should == "a string" + s.should be_an_instance_of(String) + end +end diff --git a/spec/ruby/core/string/shared/to_sym.rb b/spec/ruby/core/string/shared/to_sym.rb new file mode 100644 index 0000000000..833eae100e --- /dev/null +++ b/spec/ruby/core/string/shared/to_sym.rb @@ -0,0 +1,72 @@ +describe :string_to_sym, shared: true do + it "returns the symbol corresponding to self" do + "Koala".send(@method).should equal :Koala + 'cat'.send(@method).should equal :cat + '@cat'.send(@method).should equal :@cat + 'cat and dog'.send(@method).should equal :"cat and dog" + "abc=".send(@method).should equal :abc= + end + + it "does not special case +(binary) and -(binary)" do + "+(binary)".send(@method).should equal :"+(binary)" + "-(binary)".send(@method).should equal :"-(binary)" + end + + it "does not special case certain operators" do + "!@".send(@method).should equal :"!@" + "~@".send(@method).should equal :"~@" + "!(unary)".send(@method).should equal :"!(unary)" + "~(unary)".send(@method).should equal :"~(unary)" + "+(unary)".send(@method).should equal :"+(unary)" + "-(unary)".send(@method).should equal :"-(unary)" + end + + it "returns a US-ASCII Symbol for a UTF-8 String containing only US-ASCII characters" do + sym = "foobar".send(@method) + sym.encoding.should == Encoding::US_ASCII + sym.should equal :"foobar" + end + + it "returns a US-ASCII Symbol for a binary String containing only US-ASCII characters" do + sym = "foobar".b.send(@method) + sym.encoding.should == Encoding::US_ASCII + sym.should equal :"foobar" + end + + it "returns a UTF-8 Symbol for a UTF-8 String containing non US-ASCII characters" do + sym = "il était une fois".send(@method) + sym.encoding.should == Encoding::UTF_8 + sym.should equal :"il était une #{'fois'}" + end + + it "returns a UTF-16LE Symbol for a UTF-16LE String containing non US-ASCII characters" do + utf16_str = "UtéF16".encode(Encoding::UTF_16LE) + sym = utf16_str.send(@method) + sym.encoding.should == Encoding::UTF_16LE + sym.to_s.should == utf16_str + end + + it "returns a binary Symbol for a binary String containing non US-ASCII characters" do + binary_string = "binarí".b + sym = binary_string.send(@method) + sym.encoding.should == Encoding::BINARY + sym.to_s.should == binary_string + end + + it "ignores existing symbols with different encoding" do + source = "fée" + + iso_symbol = source.dup.force_encoding(Encoding::ISO_8859_1).send(@method) + iso_symbol.encoding.should == Encoding::ISO_8859_1 + binary_symbol = source.dup.force_encoding(Encoding::BINARY).send(@method) + binary_symbol.encoding.should == Encoding::BINARY + end + + it "raises an EncodingError for UTF-8 String containing invalid bytes" do + invalid_utf8 = "\xC3" + invalid_utf8.should_not.valid_encoding? + -> { + invalid_utf8.send(@method) + }.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"') + end +end diff --git a/spec/ruby/core/string/size_spec.rb b/spec/ruby/core/string/size_spec.rb new file mode 100644 index 0000000000..9e1f40c5ae --- /dev/null +++ b/spec/ruby/core/string/size_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/length' + +describe "String#size" do + it_behaves_like :string_length, :size +end diff --git a/spec/ruby/core/string/slice_spec.rb b/spec/ruby/core/string/slice_spec.rb new file mode 100644 index 0000000000..5aba2d3be0 --- /dev/null +++ b/spec/ruby/core/string/slice_spec.rb @@ -0,0 +1,390 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/slice' + +describe "String#slice" do + it_behaves_like :string_slice, :slice +end + +describe "String#slice with index, length" do + it_behaves_like :string_slice_index_length, :slice +end + +describe "String#slice with Range" do + it_behaves_like :string_slice_range, :slice +end + +describe "String#slice with Regexp" do + it_behaves_like :string_slice_regexp, :slice +end + +describe "String#slice with Regexp, index" do + it_behaves_like :string_slice_regexp_index, :slice +end + +describe "String#slice with Regexp, group" do + it_behaves_like :string_slice_regexp_group, :slice +end + +describe "String#slice with String" do + it_behaves_like :string_slice_string, :slice +end + +describe "String#slice with Symbol" do + it_behaves_like :string_slice_symbol, :slice +end + +describe "String#slice! with index" do + it "deletes and return the char at the given position" do + a = "hello" + a.slice!(1).should == ?e + a.should == "hllo" + a.slice!(-1).should == ?o + a.should == "hll" + end + + it "returns nil if idx is outside of self" do + a = "hello" + a.slice!(20).should == nil + a.should == "hello" + a.slice!(-20).should == nil + a.should == "hello" + end + + it "raises a FrozenError if self is frozen" do + -> { "hello".freeze.slice!(1) }.should raise_error(FrozenError) + -> { "hello".freeze.slice!(10) }.should raise_error(FrozenError) + -> { "".freeze.slice!(0) }.should raise_error(FrozenError) + end + + it "calls to_int on index" do + "hello".slice!(0.5).should == ?h + + obj = mock('1') + obj.should_receive(:to_int).at_least(1).and_return(1) + "hello".slice!(obj).should == ?e + + obj = mock('1') + obj.should_receive(:respond_to?).at_least(1).with(:to_int, true).and_return(true) + obj.should_receive(:method_missing).at_least(1).with(:to_int).and_return(1) + "hello".slice!(obj).should == ?e + end + + + it "returns the character given by the character index" do + "hellö there".slice!(1).should == "e" + "hellö there".slice!(4).should == "ö" + "hellö there".slice!(6).should == "t" + end + +end + +describe "String#slice! with index, length" do + it "deletes and returns the substring at idx and the given length" do + a = "hello" + a.slice!(1, 2).should == "el" + a.should == "hlo" + + a.slice!(1, 0).should == "" + a.should == "hlo" + + a.slice!(-2, 4).should == "lo" + a.should == "h" + end + + it "returns nil if the given position is out of self" do + a = "hello" + a.slice(10, 3).should == nil + a.should == "hello" + + a.slice(-10, 20).should == nil + a.should == "hello" + end + + it "returns nil if the length is negative" do + a = "hello" + a.slice(4, -3).should == nil + a.should == "hello" + end + + it "raises a FrozenError if self is frozen" do + -> { "hello".freeze.slice!(1, 2) }.should raise_error(FrozenError) + -> { "hello".freeze.slice!(10, 3) }.should raise_error(FrozenError) + -> { "hello".freeze.slice!(-10, 3)}.should raise_error(FrozenError) + -> { "hello".freeze.slice!(4, -3) }.should raise_error(FrozenError) + -> { "hello".freeze.slice!(10, 3) }.should raise_error(FrozenError) + -> { "hello".freeze.slice!(-10, 3)}.should raise_error(FrozenError) + -> { "hello".freeze.slice!(4, -3) }.should raise_error(FrozenError) + end + + it "calls to_int on idx and length" do + "hello".slice!(0.5, 2.5).should == "he" + + obj = mock('2') + def obj.to_int() 2 end + "hello".slice!(obj, obj).should == "ll" + + obj = mock('2') + def obj.respond_to?(name, *) name == :to_int; end + def obj.method_missing(name, *) name == :to_int ? 2 : super; end + "hello".slice!(obj, obj).should == "ll" + end + + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.slice!(0, 0).should be_an_instance_of(String) + s.slice!(0, 4).should be_an_instance_of(String) + end + + it "returns the substring given by the character offsets" do + "hellö there".slice!(1,0).should == "" + "hellö there".slice!(1,3).should == "ell" + "hellö there".slice!(1,6).should == "ellö t" + "hellö there".slice!(1,9).should == "ellö ther" + end + + it "treats invalid bytes as single bytes" do + xE6xCB = [0xE6,0xCB].pack('CC').force_encoding('utf-8') + "a#{xE6xCB}b".slice!(1, 2).should == xE6xCB + end +end + +describe "String#slice! Range" do + it "deletes and return the substring given by the offsets of the range" do + a = "hello" + a.slice!(1..3).should == "ell" + a.should == "ho" + a.slice!(0..0).should == "h" + a.should == "o" + a.slice!(0...0).should == "" + a.should == "o" + + # Edge Case? + "hello".slice!(-3..-9).should == "" + end + + it "returns nil if the given range is out of self" do + a = "hello" + a.slice!(-6..-9).should == nil + a.should == "hello" + + b = "hello" + b.slice!(10..20).should == nil + b.should == "hello" + end + + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.slice!(0...0).should be_an_instance_of(String) + s.slice!(0..4).should be_an_instance_of(String) + end + + it "calls to_int on range arguments" do + from = mock('from') + to = mock('to') + + # So we can construct a range out of them... + def from.<=>(o) 0 end + def to.<=>(o) 0 end + + def from.to_int() 1 end + def to.to_int() -2 end + + "hello there".slice!(from..to).should == "ello ther" + + from = mock('from') + to = mock('to') + + def from.<=>(o) 0 end + def to.<=>(o) 0 end + + def from.respond_to?(name, *) name == :to_int; end + def from.method_missing(name) name == :to_int ? 1 : super; end + def to.respond_to?(name, *) name == :to_int; end + def to.method_missing(name) name == :to_int ? -2 : super; end + + "hello there".slice!(from..to).should == "ello ther" + end + + it "works with Range subclasses" do + a = "GOOD" + range_incl = StringSpecs::MyRange.new(1, 2) + + a.slice!(range_incl).should == "OO" + end + + + it "returns the substring given by the character offsets of the range" do + "hellö there".slice!(1..1).should == "e" + "hellö there".slice!(1..3).should == "ell" + "hellö there".slice!(1...3).should == "el" + "hellö there".slice!(-4..-2).should == "her" + "hellö there".slice!(-4...-2).should == "he" + "hellö there".slice!(5..-1).should == " there" + "hellö there".slice!(5...-1).should == " ther" + end + + + it "raises a FrozenError on a frozen instance that is modified" do + -> { "hello".freeze.slice!(1..3) }.should raise_error(FrozenError) + end + + # see redmine #1551 + it "raises a FrozenError on a frozen instance that would not be modified" do + -> { "hello".freeze.slice!(10..20)}.should raise_error(FrozenError) + end +end + +describe "String#slice! with Regexp" do + it "deletes and returns the first match from self" do + s = "this is a string" + s.slice!(/s.*t/).should == 's is a st' + s.should == 'thiring' + + c = "hello hello" + c.slice!(/llo/).should == "llo" + c.should == "he hello" + end + + it "returns nil if there was no match" do + s = "this is a string" + s.slice!(/zzz/).should == nil + s.should == "this is a string" + end + + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.slice!(//).should be_an_instance_of(String) + s.slice!(/../).should be_an_instance_of(String) + end + + it "returns the matching portion of self with a multi byte character" do + "hëllo there".slice!(/[ë](.)\1/).should == "ëll" + "".slice!(//).should == "" + end + + it "sets $~ to MatchData when there is a match and nil when there's none" do + 'hello'.slice!(/./) + $~[0].should == 'h' + + 'hello'.slice!(/not/) + $~.should == nil + end + + it "raises a FrozenError on a frozen instance that is modified" do + -> { "this is a string".freeze.slice!(/s.*t/) }.should raise_error(FrozenError) + end + + it "raises a FrozenError on a frozen instance that would not be modified" do + -> { "this is a string".freeze.slice!(/zzz/) }.should raise_error(FrozenError) + end +end + +describe "String#slice! with Regexp, index" do + it "deletes and returns the capture for idx from self" do + str = "hello there" + str.slice!(/[aeiou](.)\1/, 0).should == "ell" + str.should == "ho there" + str.slice!(/(t)h/, 1).should == "t" + str.should == "ho here" + end + + it "returns nil if there was no match" do + s = "this is a string" + s.slice!(/x(zzz)/, 1).should == nil + s.should == "this is a string" + end + + it "returns nil if there is no capture for idx" do + "hello there".slice!(/[aeiou](.)\1/, 2).should == nil + # You can't refer to 0 using negative indices + "hello there".slice!(/[aeiou](.)\1/, -2).should == nil + end + + it "accepts a Float for capture index" do + "har".slice!(/(.)(.)(.)/, 1.5).should == "h" + end + + it "calls #to_int to convert an Object to capture index" do + obj = mock('2') + obj.should_receive(:to_int).at_least(1).times.and_return(2) + + "har".slice!(/(.)(.)(.)/, obj).should == "a" + end + + it "returns String instances" do + s = StringSpecs::MyString.new("hello") + s.slice!(/(.)(.)/, 0).should be_an_instance_of(String) + s.slice!(/(.)(.)/, 1).should be_an_instance_of(String) + end + + it "returns the encoding aware capture for the given index" do + "hår".slice!(/(.)(.)(.)/, 0).should == "hår" + "hår".slice!(/(.)(.)(.)/, 1).should == "h" + "hår".slice!(/(.)(.)(.)/, 2).should == "å" + "hår".slice!(/(.)(.)(.)/, 3).should == "r" + "hår".slice!(/(.)(.)(.)/, -1).should == "r" + "hår".slice!(/(.)(.)(.)/, -2).should == "å" + "hår".slice!(/(.)(.)(.)/, -3).should == "h" + end + + it "sets $~ to MatchData when there is a match and nil when there's none" do + 'hello'[/.(.)/, 0] + $~[0].should == 'he' + + 'hello'[/.(.)/, 1] + $~[1].should == 'e' + + 'hello'[/not/, 0] + $~.should == nil + end + + it "raises a FrozenError if self is frozen" do + -> { "this is a string".freeze.slice!(/s.*t/) }.should raise_error(FrozenError) + -> { "this is a string".freeze.slice!(/zzz/, 0)}.should raise_error(FrozenError) + -> { "this is a string".freeze.slice!(/(.)/, 2)}.should raise_error(FrozenError) + end +end + +describe "String#slice! with String" do + it "removes and returns the first occurrence of other_str from self" do + c = "hello hello" + c.slice!('llo').should == "llo" + c.should == "he hello" + end + + it "doesn't set $~" do + $~ = nil + + 'hello'.slice!('ll') + $~.should == nil + end + + it "returns nil if self does not contain other" do + a = "hello" + a.slice!('zzz').should == nil + a.should == "hello" + end + + it "doesn't call to_str on its argument" do + o = mock('x') + o.should_not_receive(:to_str) + + -> { "hello".slice!(o) }.should raise_error(TypeError) + end + + it "returns a subclass instance when given a subclass instance" do + s = StringSpecs::MyString.new("el") + r = "hello".slice!(s) + r.should == "el" + r.should be_an_instance_of(String) + end + + it "raises a FrozenError if self is frozen" do + -> { "hello hello".freeze.slice!('llo') }.should raise_error(FrozenError) + -> { "this is a string".freeze.slice!('zzz')}.should raise_error(FrozenError) + -> { "this is a string".freeze.slice!('zzz')}.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/split_spec.rb b/spec/ruby/core/string/split_spec.rb new file mode 100644 index 0000000000..3c6d1864d1 --- /dev/null +++ b/spec/ruby/core/string/split_spec.rb @@ -0,0 +1,546 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#split with String" do + it "throws an ArgumentError if the string is not a valid" do + s = "\xDF".dup.force_encoding(Encoding::UTF_8) + + -> { s.split }.should raise_error(ArgumentError) + -> { s.split(':') }.should raise_error(ArgumentError) + end + + it "throws an ArgumentError if the pattern is not a valid string" do + str = 'проверка' + broken_str = "\xDF".dup.force_encoding(Encoding::UTF_8) + + -> { str.split(broken_str) }.should raise_error(ArgumentError) + end + + it "splits on multibyte characters" do + "ありがりがとう".split("が").should == ["あり", "り", "とう"] + end + + it "returns an array of substrings based on splitting on the given string" do + "mellow yellow".split("ello").should == ["m", "w y", "w"] + end + + it "suppresses trailing empty fields when limit isn't given or 0" do + "1,2,,3,4,,".split(',').should == ["1", "2", "", "3", "4"] + "1,2,,3,4,,".split(',', 0).should == ["1", "2", "", "3", "4"] + " a b c\nd ".split(" ").should == ["", "a", "b", "c\nd"] + " a あ c\nd ".split(" ").should == ["", "a", "あ", "c\nd"] + "hai".split("hai").should == [] + ",".split(",").should == [] + ",".split(",", 0).should == [] + "あ".split("あ").should == [] + "あ".split("あ", 0).should == [] + end + + it "does not suppress trailing empty fields when a positive limit is given" do + " 1 2 ".split(" ", 2).should == ["1", "2 "] + " 1 2 ".split(" ", 3).should == ["1", "2", ""] + " 1 2 ".split(" ", 4).should == ["1", "2", ""] + " 1 あ ".split(" ", 2).should == ["1", "あ "] + " 1 あ ".split(" ", 3).should == ["1", "あ", ""] + " 1 あ ".split(" ", 4).should == ["1", "あ", ""] + + "1,2,".split(',', 2).should == ["1", "2,"] + "1,2,".split(',', 3).should == ["1", "2", ""] + "1,2,".split(',', 4).should == ["1", "2", ""] + "1,あ,".split(',', 2).should == ["1", "あ,"] + "1,あ,".split(',', 3).should == ["1", "あ", ""] + "1,あ,".split(',', 4).should == ["1", "あ", ""] + + "1 2 ".split(/ /, 2).should == ["1", "2 "] + "1 2 ".split(/ /, 3).should == ["1", "2", ""] + "1 2 ".split(/ /, 4).should == ["1", "2", ""] + "1 あ ".split(/ /, 2).should == ["1", "あ "] + "1 あ ".split(/ /, 3).should == ["1", "あ", ""] + "1 あ ".split(/ /, 4).should == ["1", "あ", ""] + end + + it "returns an array with one entry if limit is 1: the original string" do + "hai".split("hai", 1).should == ["hai"] + "x.y.z".split(".", 1).should == ["x.y.z"] + "hello world ".split(" ", 1).should == ["hello world "] + "hi!".split("", 1).should == ["hi!"] + end + + it "returns at most limit fields when limit > 1" do + "hai".split("hai", 2).should == ["", ""] + + "1,2".split(",", 3).should == ["1", "2"] + + "1,2,,3,4,,".split(',', 2).should == ["1", "2,,3,4,,"] + "1,2,,3,4,,".split(',', 3).should == ["1", "2", ",3,4,,"] + "1,2,,3,4,,".split(',', 4).should == ["1", "2", "", "3,4,,"] + "1,2,,3,4,,".split(',', 5).should == ["1", "2", "", "3", "4,,"] + "1,2,,3,4,,".split(',', 6).should == ["1", "2", "", "3", "4", ","] + + "x".split('x', 2).should == ["", ""] + "xx".split('x', 2).should == ["", "x"] + "xx".split('x', 3).should == ["", "", ""] + "xxx".split('x', 2).should == ["", "xx"] + "xxx".split('x', 3).should == ["", "", "x"] + "xxx".split('x', 4).should == ["", "", "", ""] + end + + it "doesn't suppress or limit fields when limit is negative" do + "1,2,,3,4,,".split(',', -1).should == ["1", "2", "", "3", "4", "", ""] + "1,2,,3,4,,".split(',', -5).should == ["1", "2", "", "3", "4", "", ""] + " a b c\nd ".split(" ", -1).should == ["", "a", "b", "c\nd", ""] + ",".split(",", -1).should == ["", ""] + end + + it "raises a RangeError when the limit is larger than int" do + -> { "a,b".split(" ", 2147483649) }.should raise_error(RangeError) + end + + it "defaults to $; when string isn't given or nil" do + suppress_warning do + old_fs = $; + begin + [",", ":", "", "XY", nil].each do |fs| + $; = fs + + ["x,y,z,,,", "1:2:", "aXYbXYcXY", ""].each do |str| + expected = str.split(fs || " ") + + str.split(nil).should == expected + str.split.should == expected + + str.split(nil, -1).should == str.split(fs || " ", -1) + str.split(nil, 0).should == str.split(fs || " ", 0) + str.split(nil, 2).should == str.split(fs || " ", 2) + end + end + ensure + $; = old_fs + end + end + + context "when $; is not nil" do + before do + suppress_warning do + @old_value, $; = $;, 'foobar' + end + end + + after do + $; = @old_value + end + + it "warns" do + -> { "".split }.should complain(/warning: \$; is set to non-nil value/) + end + end + end + + it "ignores leading and continuous whitespace when string is a single space" do + " now's the time ".split(' ').should == ["now's", "the", "time"] + " now's the time ".split(' ', -1).should == ["now's", "the", "time", ""] + " now's the time ".split(' ', 3).should == ["now's", "the", "time "] + + "\t\n a\t\tb \n\r\r\nc\v\vd\v ".split(' ').should == ["a", "b", "c", "d"] + "a\x00a b".split(' ').should == ["a\x00a", "b"] + end + + describe "when limit is zero" do + it "ignores leading and continuous whitespace when string is a single space" do + " now's the time ".split(' ', 0).should == ["now's", "the", "time"] + end + end + + it "splits between characters when its argument is an empty string" do + "hi!".split("").should == ["h", "i", "!"] + "hi!".split("", -1).should == ["h", "i", "!", ""] + "hi!".split("", 0).should == ["h", "i", "!"] + "hi!".split("", 1).should == ["hi!"] + "hi!".split("", 2).should == ["h", "i!"] + "hi!".split("", 3).should == ["h", "i", "!"] + "hi!".split("", 4).should == ["h", "i", "!", ""] + "hi!".split("", 5).should == ["h", "i", "!", ""] + end + + it "tries converting its pattern argument to a string via to_str" do + obj = mock('::') + obj.should_receive(:to_str).and_return("::") + + "hello::world".split(obj).should == ["hello", "world"] + end + + it "tries converting limit to an integer via to_int" do + obj = mock('2') + obj.should_receive(:to_int).and_return(2) + + "1.2.3.4".split(".", obj).should == ["1", "2.3.4"] + end + + it "doesn't set $~" do + $~ = nil + "x.y.z".split(".") + $~.should == nil + end + + it "returns the original string if no matches are found" do + "foo".split("bar").should == ["foo"] + "foo".split("bar", -1).should == ["foo"] + "foo".split("bar", 0).should == ["foo"] + "foo".split("bar", 1).should == ["foo"] + "foo".split("bar", 2).should == ["foo"] + "foo".split("bar", 3).should == ["foo"] + end + + it "returns String instances based on self" do + ["", "x.y.z.", " x y "].each do |str| + ["", ".", " "].each do |pat| + [-1, 0, 1, 2].each do |limit| + StringSpecs::MyString.new(str).split(pat, limit).each do |x| + x.should be_an_instance_of(String) + end + + str.split(StringSpecs::MyString.new(pat), limit).each do |x| + x.should be_an_instance_of(String) + end + end + end + end + end + + it "returns an empty array when whitespace is split on whitespace" do + " ".split(" ").should == [] + " \n ".split(" ").should == [] + " ".split(" ").should == [] + " \t ".split(" ").should == [] + end + + it "doesn't split on non-ascii whitespace" do + "a\u{2008}b".split(" ").should == ["a\u{2008}b"] + end + + it "returns Strings in the same encoding as self" do + strings = "hello world".encode("US-ASCII").split(" ") + + strings[0].encoding.should == Encoding::US_ASCII + strings[1].encoding.should == Encoding::US_ASCII + end +end + +describe "String#split with Regexp" do + it "throws an ArgumentError if the string is not a valid" do + s = "\xDF".dup.force_encoding(Encoding::UTF_8) + + -> { s.split(/./) }.should raise_error(ArgumentError) + end + + it "divides self on regexp matches" do + " now's the time".split(/ /).should == ["", "now's", "", "the", "time"] + " x\ny ".split(/ /).should == ["", "x\ny"] + "1, 2.34,56, 7".split(/,\s*/).should == ["1", "2.34", "56", "7"] + "1x2X3".split(/x/i).should == ["1", "2", "3"] + end + + it "treats negative limits as no limit" do + "".split(%r!/+!, -1).should == [] + end + + it "suppresses trailing empty fields when limit isn't given or 0" do + "1,2,,3,4,,".split(/,/).should == ["1", "2", "", "3", "4"] + "1,2,,3,4,,".split(/,/, 0).should == ["1", "2", "", "3", "4"] + " a b c\nd ".split(/\s+/).should == ["", "a", "b", "c", "d"] + "hai".split(/hai/).should == [] + ",".split(/,/).should == [] + ",".split(/,/, 0).should == [] + end + + it "returns an array with one entry if limit is 1: the original string" do + "hai".split(/hai/, 1).should == ["hai"] + "xAyBzC".split(/[A-Z]/, 1).should == ["xAyBzC"] + "hello world ".split(/\s+/, 1).should == ["hello world "] + "hi!".split(//, 1).should == ["hi!"] + end + + it "returns at most limit fields when limit > 1" do + "hai".split(/hai/, 2).should == ["", ""] + + "1,2".split(/,/, 3).should == ["1", "2"] + + "1,2,,3,4,,".split(/,/, 2).should == ["1", "2,,3,4,,"] + "1,2,,3,4,,".split(/,/, 3).should == ["1", "2", ",3,4,,"] + "1,2,,3,4,,".split(/,/, 4).should == ["1", "2", "", "3,4,,"] + "1,2,,3,4,,".split(/,/, 5).should == ["1", "2", "", "3", "4,,"] + "1,2,,3,4,,".split(/,/, 6).should == ["1", "2", "", "3", "4", ","] + + "x".split(/x/, 2).should == ["", ""] + "xx".split(/x/, 2).should == ["", "x"] + "xx".split(/x/, 3).should == ["", "", ""] + "xxx".split(/x/, 2).should == ["", "xx"] + "xxx".split(/x/, 3).should == ["", "", "x"] + "xxx".split(/x/, 4).should == ["", "", "", ""] + end + + it "doesn't suppress or limit fields when limit is negative" do + "1,2,,3,4,,".split(/,/, -1).should == ["1", "2", "", "3", "4", "", ""] + "1,2,,3,4,,".split(/,/, -5).should == ["1", "2", "", "3", "4", "", ""] + " a b c\nd ".split(/\s+/, -1).should == ["", "a", "b", "c", "d", ""] + ",".split(/,/, -1).should == ["", ""] + end + + it "defaults to $; when regexp isn't given or nil" do + suppress_warning do + old_fs = $; + begin + [/,/, /:/, //, /XY/, /./].each do |fs| + $; = fs + + ["x,y,z,,,", "1:2:", "aXYbXYcXY", ""].each do |str| + expected = str.split(fs) + + str.split(nil).should == expected + str.split.should == expected + + str.split(nil, -1).should == str.split(fs, -1) + str.split(nil, 0).should == str.split(fs, 0) + str.split(nil, 2).should == str.split(fs, 2) + end + end + ensure + $; = old_fs + end + end + end + + it "splits between characters when regexp matches a zero-length string" do + "hello".split(//).should == ["h", "e", "l", "l", "o"] + "hello".split(//, -1).should == ["h", "e", "l", "l", "o", ""] + "hello".split(//, 0).should == ["h", "e", "l", "l", "o"] + "hello".split(//, 1).should == ["hello"] + "hello".split(//, 2).should == ["h", "ello"] + "hello".split(//, 5).should == ["h", "e", "l", "l", "o"] + "hello".split(//, 6).should == ["h", "e", "l", "l", "o", ""] + "hello".split(//, 7).should == ["h", "e", "l", "l", "o", ""] + + "hi mom".split(/\s*/).should == ["h", "i", "m", "o", "m"] + + "AABCCBAA".split(/(?=B)/).should == ["AA", "BCC", "BAA"] + "AABCCBAA".split(/(?=B)/, -1).should == ["AA", "BCC", "BAA"] + "AABCCBAA".split(/(?=B)/, 2).should == ["AA", "BCCBAA"] + end + + it "respects unicode when splitting between characters" do + str = "こにちわ" + reg = %r!! + ary = str.split(reg) + ary.size.should == 4 + ary.should == ["こ", "に", "ち", "わ"] + end + + it "respects the encoding of the regexp when splitting between characters" do + str = "\303\202" + ary = str.split(//u) + ary.size.should == 1 + ary.should == ["\303\202"] + end + + it "includes all captures in the result array" do + "hello".split(/(el)/).should == ["h", "el", "lo"] + "hi!".split(/()/).should == ["h", "", "i", "", "!"] + "hi!".split(/()/, -1).should == ["h", "", "i", "", "!", "", ""] + "hello".split(/((el))()/).should == ["h", "el", "el", "", "lo"] + "AabB".split(/([a-z])+/).should == ["A", "b", "B"] + end + + it "applies the limit to the number of split substrings, without counting captures" do + "aBaBa".split(/(B)()()/, 2).should == ["a", "B", "", "", "aBa"] + end + + it "does not include non-matching captures in the result array" do + "hello".split(/(el)|(xx)/).should == ["h", "el", "lo"] + end + + it "tries converting limit to an integer via to_int" do + obj = mock('2') + obj.should_receive(:to_int).and_return(2) + + "1.2.3.4".split(".", obj).should == ["1", "2.3.4"] + end + + it "returns a type error if limit can't be converted to an integer" do + -> {"1.2.3.4".split(".", "three")}.should raise_error(TypeError) + -> {"1.2.3.4".split(".", nil) }.should raise_error(TypeError) + end + + it "doesn't set $~" do + $~ = nil + "x:y:z".split(/:/) + $~.should == nil + end + + it "returns the original string if no matches are found" do + "foo".split(/bar/).should == ["foo"] + "foo".split(/bar/, -1).should == ["foo"] + "foo".split(/bar/, 0).should == ["foo"] + "foo".split(/bar/, 1).should == ["foo"] + "foo".split(/bar/, 2).should == ["foo"] + "foo".split(/bar/, 3).should == ["foo"] + end + + it "returns String instances based on self" do + ["", "x:y:z:", " x y "].each do |str| + [//, /:/, /\s+/].each do |pat| + [-1, 0, 1, 2].each do |limit| + StringSpecs::MyString.new(str).split(pat, limit).each do |x| + x.should be_an_instance_of(String) + end + end + end + end + end + + it "returns Strings in the same encoding as self" do + ary = "а б в".split + encodings = ary.map { |s| s.encoding } + encodings.should == [Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8] + end + + it "splits a string on each character for a multibyte encoding and empty split" do + "That's why efficiency could not be helped".split("").size.should == 39 + end + + it "returns an ArgumentError if an invalid UTF-8 string is supplied" do + broken_str = +'проверка' # in russian, means "test" + broken_str.force_encoding('binary') + broken_str.chop! + broken_str.force_encoding('utf-8') + ->{ broken_str.split(/\r\n|\r|\n/) }.should raise_error(ArgumentError) + end + + # See https://bugs.ruby-lang.org/issues/12689 and https://github.com/jruby/jruby/issues/4868 + it "allows concurrent Regexp calls in a shared context" do + str = 'a,b,c,d,e' + + p = proc { str.split(/,/) } + results = 10.times.map { Thread.new { x = nil; 100.times { x = p.call }; x } }.map(&:value) + + results.should == [%w[a b c d e]] * 10 + end + + context "when a block is given" do + it "yields each split substring with default pattern" do + a = [] + returned_object = "chunky bacon".split { |str| a << str.capitalize } + + returned_object.should == "chunky bacon" + a.should == ["Chunky", "Bacon"] + end + + it "yields each split substring with default pattern for a lazy substring" do + a = [] + returned_object = "chunky bacon"[1...-1].split { |str| a << str.capitalize } + + returned_object.should == "hunky baco" + a.should == ["Hunky", "Baco"] + end + + it "yields each split substring with default pattern for a non-ASCII string" do + a = [] + returned_object = "l'été arrive bientôt".split { |str| a << str } + + returned_object.should == "l'été arrive bientôt" + a.should == ["l'été", "arrive", "bientôt"] + end + + it "yields each split substring with default pattern for a non-ASCII lazy substring" do + a = [] + returned_object = "l'été arrive bientôt"[1...-1].split { |str| a << str } + + returned_object.should == "'été arrive bientô" + a.should == ["'été", "arrive", "bientô"] + end + + it "yields the string when limit is 1" do + a = [] + returned_object = "chunky bacon".split("", 1) { |str| a << str.capitalize } + + returned_object.should == "chunky bacon" + a.should == ["Chunky bacon"] + end + + it "yields each split letter" do + a = [] + returned_object = "chunky".split("", 0) { |str| a << str.capitalize } + + returned_object.should == "chunky" + a.should == %w(C H U N K Y) + end + + it "yields each split substring with a pattern" do + a = [] + returned_object = "chunky-bacon".split("-", 0) { |str| a << str.capitalize } + + returned_object.should == "chunky-bacon" + a.should == ["Chunky", "Bacon"] + end + + it "yields each split substring with empty regexp pattern" do + a = [] + returned_object = "chunky".split(//) { |str| a << str.capitalize } + + returned_object.should == "chunky" + a.should == %w(C H U N K Y) + end + + it "yields each split substring with empty regexp pattern and limit" do + a = [] + returned_object = "chunky".split(//, 3) { |str| a << str.capitalize } + + returned_object.should == "chunky" + a.should == %w(C H Unky) + end + + it "yields each split substring with a regexp pattern" do + a = [] + returned_object = "chunky:bacon".split(/:/) { |str| a << str.capitalize } + + returned_object.should == "chunky:bacon" + a.should == ["Chunky", "Bacon"] + end + + it "returns a string as is (and doesn't call block) if it is empty" do + a = [] + returned_object = "".split { |str| a << str.capitalize } + + returned_object.should == "" + a.should == [] + end + end + + describe "for a String subclass" do + it "yields instances of String" do + a = [] + StringSpecs::MyString.new("a|b").split("|") { |str| a << str } + first, last = a + + first.should be_an_instance_of(String) + first.should == "a" + + last.should be_an_instance_of(String) + last.should == "b" + end + end + + it "raises a TypeError when not called with nil, String, or Regexp" do + -> { "hello".split(42) }.should raise_error(TypeError) + -> { "hello".split(:ll) }.should raise_error(TypeError) + -> { "hello".split(false) }.should raise_error(TypeError) + -> { "hello".split(Object.new) }.should raise_error(TypeError) + end + + it "returns Strings in the same encoding as self" do + strings = "hello world".encode("US-ASCII").split(/ /) + + strings[0].encoding.should == Encoding::US_ASCII + strings[1].encoding.should == Encoding::US_ASCII + end +end diff --git a/spec/ruby/core/string/squeeze_spec.rb b/spec/ruby/core/string/squeeze_spec.rb new file mode 100644 index 0000000000..981d480684 --- /dev/null +++ b/spec/ruby/core/string/squeeze_spec.rb @@ -0,0 +1,111 @@ +# encoding: binary +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +# TODO: rewrite all these specs + +describe "String#squeeze" do + it "returns new string where runs of the same character are replaced by a single character when no args are given" do + "yellow moon".squeeze.should == "yelow mon" + end + + it "only squeezes chars that are in the intersection of all sets given" do + "woot squeeze cheese".squeeze("eost", "queo").should == "wot squeze chese" + " now is the".squeeze(" ").should == " now is the" + end + + it "negates sets starting with ^" do + s = "<<subbookkeeper!!!>>" + s.squeeze("beko", "^e").should == s.squeeze("bko") + s.squeeze("^<bek!>").should == s.squeeze("o") + s.squeeze("^o").should == s.squeeze("<bek!>") + s.squeeze("^").should == s + "^__^".squeeze("^^").should == "^_^" + "((^^__^^))".squeeze("_^").should == "((^_^))" + end + + it "squeezes all chars in a sequence" do + s = "--subbookkeeper--" + s.squeeze("\x00-\xFF").should == s.squeeze + s.squeeze("bk-o").should == s.squeeze("bklmno") + s.squeeze("b-e").should == s.squeeze("bcde") + s.squeeze("e-").should == "-subbookkeper-" + s.squeeze("-e").should == "-subbookkeper-" + s.squeeze("---").should == "-subbookkeeper-" + "ook--001122".squeeze("--2").should == "ook-012" + "ook--(())".squeeze("(--").should == "ook-()" + s.squeeze("^b-e").should == "-subbokeeper-" + "^^__^^".squeeze("^^-^").should == "^^_^^" + "^^--^^".squeeze("^---").should == "^--^" + + s.squeeze("b-dk-o-").should == "-subokeeper-" + s.squeeze("-b-dk-o").should == "-subokeeper-" + s.squeeze("b-d-k-o").should == "-subokeeper-" + + s.squeeze("bc-e").should == "--subookkeper--" + s.squeeze("^bc-e").should == "-subbokeeper-" + + "AABBCCaabbcc[[]]".squeeze("A-a").should == "ABCabbcc[]" + end + + it "raises an ArgumentError when the parameter is out of sequence" do + s = "--subbookkeeper--" + -> { s.squeeze("e-b") }.should raise_error(ArgumentError) + -> { s.squeeze("^e-b") }.should raise_error(ArgumentError) + end + + it "tries to convert each set arg to a string using to_str" do + other_string = mock('lo') + other_string.should_receive(:to_str).and_return("lo") + + other_string2 = mock('o') + other_string2.should_receive(:to_str).and_return("o") + + "hello room".squeeze(other_string, other_string2).should == "hello rom" + end + + it "returns a String in the same encoding as self" do + "yellow moon".encode("US-ASCII").squeeze.encoding.should == Encoding::US_ASCII + "yellow moon".encode("US-ASCII").squeeze("a").encoding.should == Encoding::US_ASCII + end + + it "raises a TypeError when one set arg can't be converted to a string" do + -> { "hello world".squeeze([]) }.should raise_error(TypeError) + -> { "hello world".squeeze(Object.new)}.should raise_error(TypeError) + -> { "hello world".squeeze(mock('x')) }.should raise_error(TypeError) + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("oh no!!!").squeeze("!").should be_an_instance_of(String) + end +end + +describe "String#squeeze!" do + it "modifies self in place and returns self" do + a = "yellow moon" + a.squeeze!.should equal(a) + a.should == "yelow mon" + end + + it "returns nil if no modifications were made" do + a = "squeeze" + a.squeeze!("u", "sq").should == nil + a.squeeze!("q").should == nil + a.should == "squeeze" + end + + it "raises an ArgumentError when the parameter is out of sequence" do + s = "--subbookkeeper--" + -> { s.squeeze!("e-b") }.should raise_error(ArgumentError) + -> { s.squeeze!("^e-b") }.should raise_error(ArgumentError) + end + + it "raises a FrozenError when self is frozen" do + a = "yellow moon" + a.freeze + + -> { a.squeeze!("") }.should raise_error(FrozenError) + -> { a.squeeze! }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/start_with_spec.rb b/spec/ruby/core/string/start_with_spec.rb new file mode 100644 index 0000000000..35e33b46a6 --- /dev/null +++ b/spec/ruby/core/string/start_with_spec.rb @@ -0,0 +1,27 @@ +# -*- encoding: utf-8 -*- +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative '../../shared/string/start_with' + +describe "String#start_with?" do + it_behaves_like :start_with, :to_s + + # Here and not in the shared examples because this is invalid as a Symbol + it "matches part of a character with the same part" do + "\xA9".should.start_with?("\xA9") # A9 is not a character head for UTF-8 + end + + ruby_version_is ""..."3.3" do + it "does not check we are matching only part of a character" do + "\xe3\x81\x82".size.should == 1 + "\xe3\x81\x82".should.start_with?("\xe3") + end + end + + ruby_version_is "3.3" do # #19784 + it "checks we are matching only part of a character" do + "\xe3\x81\x82".size.should == 1 + "\xe3\x81\x82".should_not.start_with?("\xe3") + end + end +end diff --git a/spec/ruby/core/string/string_spec.rb b/spec/ruby/core/string/string_spec.rb new file mode 100644 index 0000000000..cdefbbecbd --- /dev/null +++ b/spec/ruby/core/string/string_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../spec_helper' + +describe "String" do + it "includes Comparable" do + String.include?(Comparable).should == true + end +end diff --git a/spec/ruby/core/string/strip_spec.rb b/spec/ruby/core/string/strip_spec.rb new file mode 100644 index 0000000000..edb6ea3b44 --- /dev/null +++ b/spec/ruby/core/string/strip_spec.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/strip' + +describe "String#strip" do + it_behaves_like :string_strip, :strip + + it "returns a new string with leading and trailing whitespace removed" do + " hello ".strip.should == "hello" + " hello world ".strip.should == "hello world" + "\tgoodbye\r\v\n".strip.should == "goodbye" + end + + it "returns a copy of self without leading and trailing NULL bytes and whitespace" do + " \x00 goodbye \x00 ".strip.should == "goodbye" + end +end + +describe "String#strip!" do + it "modifies self in place and returns self" do + a = " hello " + a.strip!.should equal(a) + a.should == "hello" + + a = "\tgoodbye\r\v\n" + a.strip! + a.should == "goodbye" + end + + it "returns nil if no modifications where made" do + a = "hello" + a.strip!.should == nil + a.should == "hello" + end + + it "makes a string empty if it is only whitespace" do + "".strip!.should == nil + " ".strip.should == "" + " ".strip.should == "" + end + + it "removes leading and trailing NULL bytes and whitespace" do + a = "\000 goodbye \000" + a.strip! + a.should == "goodbye" + end + + it "raises a FrozenError on a frozen instance that is modified" do + -> { " hello ".freeze.strip! }.should raise_error(FrozenError) + end + + # see #1552 + it "raises a FrozenError on a frozen instance that would not be modified" do + -> {"hello".freeze.strip! }.should raise_error(FrozenError) + -> {"".freeze.strip! }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/sub_spec.rb b/spec/ruby/core/string/sub_spec.rb new file mode 100644 index 0000000000..6ff28ec851 --- /dev/null +++ b/spec/ruby/core/string/sub_spec.rb @@ -0,0 +1,512 @@ +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#sub with pattern, replacement" do + it "returns a copy of self when no modification is made" do + a = "hello" + b = a.sub(/w.*$/, "*") + + b.should_not equal(a) + b.should == "hello" + end + + it "returns a copy of self with all occurrences of pattern replaced with replacement" do + "hello".sub(/[aeiou]/, '*').should == "h*llo" + "hello".sub(//, ".").should == ".hello" + end + + it "ignores a block if supplied" do + "food".sub(/f/, "g") { "w" }.should == "good" + end + + it "supports \\G which matches at the beginning of the string" do + "hello world!".sub(/\Ghello/, "hi").should == "hi world!" + end + + it "supports /i for ignoring case" do + "Hello".sub(/h/i, "j").should == "jello" + "hello".sub(/H/i, "j").should == "jello" + end + + it "doesn't interpret regexp metacharacters if pattern is a string" do + "12345".sub('\d', 'a').should == "12345" + '\d'.sub('\d', 'a').should == "a" + end + + it "replaces \\1 sequences with the regexp's corresponding capture" do + str = "hello" + + str.sub(/([aeiou])/, '<\1>').should == "h<e>llo" + str.sub(/(.)/, '\1\1').should == "hhello" + + str.sub(/.(.?)/, '<\0>(\1)').should == "<he>(e)llo" + + str.sub(/.(.)+/, '\1').should == "o" + + str = "ABCDEFGHIJKL" + re = /#{"(.)" * 12}/ + str.sub(re, '\1').should == "A" + str.sub(re, '\9').should == "I" + # Only the first 9 captures can be accessed in MRI + str.sub(re, '\10').should == "A0" + end + + it "treats \\1 sequences without corresponding captures as empty strings" do + str = "hello!" + + str.sub("", '<\1>').should == "<>hello!" + str.sub("h", '<\1>').should == "<>ello!" + + str.sub(//, '<\1>').should == "<>hello!" + str.sub(/./, '\1\2\3').should == "ello!" + str.sub(/.(.{20})?/, '\1').should == "ello!" + end + + it "replaces \\& and \\0 with the complete match" do + str = "hello!" + + str.sub("", '<\0>').should == "<>hello!" + str.sub("", '<\&>').should == "<>hello!" + str.sub("he", '<\0>').should == "<he>llo!" + str.sub("he", '<\&>').should == "<he>llo!" + str.sub("l", '<\0>').should == "he<l>lo!" + str.sub("l", '<\&>').should == "he<l>lo!" + + str.sub(//, '<\0>').should == "<>hello!" + str.sub(//, '<\&>').should == "<>hello!" + str.sub(/../, '<\0>').should == "<he>llo!" + str.sub(/../, '<\&>').should == "<he>llo!" + str.sub(/(.)./, '<\0>').should == "<he>llo!" + end + + it "replaces \\` with everything before the current match" do + str = "hello!" + + str.sub("", '<\`>').should == "<>hello!" + str.sub("h", '<\`>').should == "<>ello!" + str.sub("l", '<\`>').should == "he<he>lo!" + str.sub("!", '<\`>').should == "hello<hello>" + + str.sub(//, '<\`>').should == "<>hello!" + str.sub(/..o/, '<\`>').should == "he<he>!" + end + + it "replaces \\' with everything after the current match" do + str = "hello!" + + str.sub("", '<\\\'>').should == "<hello!>hello!" + str.sub("h", '<\\\'>').should == "<ello!>ello!" + str.sub("ll", '<\\\'>').should == "he<o!>o!" + str.sub("!", '<\\\'>').should == "hello<>" + + str.sub(//, '<\\\'>').should == "<hello!>hello!" + str.sub(/../, '<\\\'>').should == "<llo!>llo!" + end + + it "replaces \\\\\\+ with \\\\+" do + "x".sub(/x/, '\\\+').should == "\\+" + end + + it "replaces \\+ with the last paren that actually matched" do + str = "hello!" + + str.sub(/(.)(.)/, '\+').should == "ello!" + str.sub(/(.)(.)+/, '\+').should == "!" + str.sub(/(.)()/, '\+').should == "ello!" + str.sub(/(.)(.{20})?/, '<\+>').should == "<h>ello!" + + str = "ABCDEFGHIJKL" + re = /#{"(.)" * 12}/ + str.sub(re, '\+').should == "L" + end + + it "treats \\+ as an empty string if there was no captures" do + "hello!".sub(/./, '\+').should == "ello!" + end + + it "maps \\\\ in replacement to \\" do + "hello".sub(/./, '\\\\').should == '\\ello' + end + + it "leaves unknown \\x escapes in replacement untouched" do + "hello".sub(/./, '\\x').should == '\\xello' + "hello".sub(/./, '\\y').should == '\\yello' + end + + it "leaves \\ at the end of replacement untouched" do + "hello".sub(/./, 'hah\\').should == 'hah\\ello' + end + + it "tries to convert pattern to a string using to_str" do + pattern = mock('.') + pattern.should_receive(:to_str).and_return(".") + + "hello.".sub(pattern, "!").should == "hello!" + end + + not_supported_on :opal do + it "raises a TypeError when pattern is a Symbol" do + -> { "hello".sub(:woot, "x") }.should raise_error(TypeError) + end + end + + it "raises a TypeError when pattern is an Array" do + -> { "hello".sub([], "x") }.should raise_error(TypeError) + end + + it "raises a TypeError when pattern can't be converted to a string" do + -> { "hello".sub(Object.new, nil) }.should raise_error(TypeError) + end + + it "tries to convert replacement to a string using to_str" do + replacement = mock('hello_replacement') + replacement.should_receive(:to_str).and_return("hello_replacement") + + "hello".sub(/hello/, replacement).should == "hello_replacement" + end + + it "raises a TypeError when replacement can't be converted to a string" do + -> { "hello".sub(/[aeiou]/, []) }.should raise_error(TypeError) + -> { "hello".sub(/[aeiou]/, 99) }.should raise_error(TypeError) + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("").sub(//, "").should be_an_instance_of(String) + StringSpecs::MyString.new("").sub(/foo/, "").should be_an_instance_of(String) + StringSpecs::MyString.new("foo").sub(/foo/, "").should be_an_instance_of(String) + StringSpecs::MyString.new("foo").sub("foo", "").should be_an_instance_of(String) + end + + it "sets $~ to MatchData of match and nil when there's none" do + 'hello.'.sub('hello', 'x') + $~[0].should == 'hello' + + 'hello.'.sub('not', 'x') + $~.should == nil + + 'hello.'.sub(/.(.)/, 'x') + $~[0].should == 'he' + + 'hello.'.sub(/not/, 'x') + $~.should == nil + end + + it "replaces \\\\\\1 with \\1" do + "ababa".sub(/(b)/, '\\\1').should == "a\\1aba" + end + + it "replaces \\\\\\\\1 with \\1" do + "ababa".sub(/(b)/, '\\\\1').should == "a\\1aba" + end + + it "replaces \\\\\\\\\\1 with \\" do + "ababa".sub(/(b)/, '\\\\\1').should == "a\\baba" + end + + it "handles a pattern in a superset encoding" do + result = 'abc'.force_encoding(Encoding::US_ASCII).sub('é', 'è') + result.should == 'abc' + result.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + result = 'été'.sub('t'.force_encoding(Encoding::US_ASCII), 'u') + result.should == 'éué' + result.encoding.should == Encoding::UTF_8 + end +end + +describe "String#sub with pattern and block" do + it "returns a copy of self with the first occurrences of pattern replaced with the block's return value" do + "hi".sub(/./) { |s| s + ' ' }.should == "h i" + "hi!".sub(/(.)(.)/) { |*a| a.inspect }.should == '["hi"]!' + end + + it "sets $~ for access from the block" do + str = "hello" + str.sub(/([aeiou])/) { "<#{$~[1]}>" }.should == "h<e>llo" + str.sub(/([aeiou])/) { "<#{$1}>" }.should == "h<e>llo" + str.sub("l") { "<#{$~[0]}>" }.should == "he<l>lo" + + offsets = [] + + str.sub(/([aeiou])/) do + md = $~ + md.string.should == str + offsets << md.offset(0) + str + end.should == "hhellollo" + + offsets.should == [[1, 2]] + end + + it "sets $~ to MatchData of last match and nil when there's none for access from outside" do + 'hello.'.sub('l') { 'x' } + $~.begin(0).should == 2 + $~[0].should == 'l' + + 'hello.'.sub('not') { 'x' } + $~.should == nil + + 'hello.'.sub(/.(.)/) { 'x' } + $~[0].should == 'he' + + 'hello.'.sub(/not/) { 'x' } + $~.should == nil + end + + it "doesn't raise a RuntimeError if the string is modified while substituting" do + str = "hello" + str.sub(//) { str[0] = 'x' }.should == "xhello" + str.should == "xello" + end + + it "doesn't interpolate special sequences like \\1 for the block's return value" do + repl = '\& \0 \1 \` \\\' \+ \\\\ foo' + "hello".sub(/(.+)/) { repl }.should == repl + end + + it "converts the block's return value to a string using to_s" do + obj = mock('hello_replacement') + obj.should_receive(:to_s).and_return("hello_replacement") + "hello".sub(/hello/) { obj }.should == "hello_replacement" + + obj = mock('ok') + obj.should_receive(:to_s).and_return("ok") + "hello".sub(/.+/) { obj }.should == "ok" + end +end + +describe "String#sub! with pattern, replacement" do + it "modifies self in place and returns self" do + a = "hello" + a.sub!(/[aeiou]/, '*').should equal(a) + a.should == "h*llo" + end + + it "returns nil if no modifications were made" do + a = "hello" + a.sub!(/z/, '*').should == nil + a.sub!(/z/, 'z').should == nil + a.should == "hello" + end + + it "raises a FrozenError when self is frozen" do + s = "hello" + s.freeze + + -> { s.sub!(/ROAR/, "x") }.should raise_error(FrozenError) + -> { s.sub!(/e/, "e") }.should raise_error(FrozenError) + -> { s.sub!(/[aeiou]/, '*') }.should raise_error(FrozenError) + end + + it "handles a pattern in a superset encoding" do + string = 'abc'.force_encoding(Encoding::US_ASCII) + + result = string.sub!('é', 'è') + + result.should == nil + string.should == 'abc' + string.encoding.should == Encoding::US_ASCII + end + + it "handles a pattern in a subset encoding" do + string = 'été' + pattern = 't'.force_encoding(Encoding::US_ASCII) + + result = string.sub!(pattern, 'u') + + result.should == string + string.should == 'éué' + string.encoding.should == Encoding::UTF_8 + end +end + +describe "String#sub! with pattern and block" do + it "modifies self in place and returns self" do + a = "hello" + a.sub!(/[aeiou]/) { '*' }.should equal(a) + a.should == "h*llo" + end + + it "sets $~ for access from the block" do + str = "hello" + str.dup.sub!(/([aeiou])/) { "<#{$~[1]}>" }.should == "h<e>llo" + str.dup.sub!(/([aeiou])/) { "<#{$1}>" }.should == "h<e>llo" + str.dup.sub!("l") { "<#{$~[0]}>" }.should == "he<l>lo" + + offsets = [] + + str.dup.sub!(/([aeiou])/) do + md = $~ + md.string.should == str + offsets << md.offset(0) + str + end.should == "hhellollo" + + offsets.should == [[1, 2]] + end + + it "returns nil if no modifications were made" do + a = "hello" + a.sub!(/z/) { '*' }.should == nil + a.sub!(/z/) { 'z' }.should == nil + a.should == "hello" + end + + it "raises a RuntimeError if the string is modified while substituting" do + str = "hello" + -> { str.sub!(//) { str << 'x' } }.should raise_error(RuntimeError) + end + + it "raises a FrozenError when self is frozen" do + s = "hello" + s.freeze + + -> { s.sub!(/ROAR/) { "x" } }.should raise_error(FrozenError) + -> { s.sub!(/e/) { "e" } }.should raise_error(FrozenError) + -> { s.sub!(/[aeiou]/) { '*' } }.should raise_error(FrozenError) + end +end + +describe "String#sub with pattern and Hash" do + + it "returns a copy of self with the first occurrence of pattern replaced with the value of the corresponding hash key" do + "hello".sub(/./, 'l' => 'L').should == "ello" + "hello!".sub(/(.)(.)/, 'he' => 'she ', 'll' => 'said').should == 'she llo!' + "hello".sub('l', 'l' => 'el').should == 'heello' + end + + it "removes keys that don't correspond to matches" do + "hello".sub(/./, 'z' => 'b', 'o' => 'ow').should == "ello" + end + + it "ignores non-String keys" do + "tattoo".sub(/(tt)/, 'tt' => 'b', tt: 'z').should == "taboo" + end + + it "uses a key's value only a single time" do + "food".sub(/o/, 'o' => '0').should == "f0od" + end + + it "uses the hash's default value for missing keys" do + hsh = {} + hsh.default='?' + hsh['o'] = '0' + "food".sub(/./, hsh).should == "?ood" + end + + it "coerces the hash values with #to_s" do + hsh = {} + hsh.default=[] + hsh['o'] = 0 + obj = mock('!') + obj.should_receive(:to_s).and_return('!') + hsh['f'] = obj + "food!".sub(/./, hsh).should == "!ood!" + end + + it "uses the hash's value set from default_proc for missing keys" do + hsh = {} + hsh.default_proc = -> k, v { 'lamb' } + "food!".sub(/./, hsh).should == "lambood!" + end + + it "sets $~ to MatchData of first match and nil when there's none for access from outside" do + 'hello.'.sub('l', 'l' => 'L') + $~.begin(0).should == 2 + $~[0].should == 'l' + + 'hello.'.sub('not', 'ot' => 'to') + $~.should == nil + + 'hello.'.sub(/.(.)/, 'o' => ' hole') + $~[0].should == 'he' + + 'hello.'.sub(/not/, 'z' => 'glark') + $~.should == nil + end + + it "doesn't interpolate special sequences like \\1 for the block's return value" do + repl = '\& \0 \1 \` \\\' \+ \\\\ foo' + "hello".sub(/(.+)/, 'hello' => repl ).should == repl + end + +end + +describe "String#sub! with pattern and Hash" do + + it "returns self with the first occurrence of pattern replaced with the value of the corresponding hash key" do + "hello".sub!(/./, 'l' => 'L').should == "ello" + "hello!".sub!(/(.)(.)/, 'he' => 'she ', 'll' => 'said').should == 'she llo!' + "hello".sub!('l', 'l' => 'el').should == 'heello' + end + + it "removes keys that don't correspond to matches" do + "hello".sub!(/./, 'z' => 'b', 'o' => 'ow').should == "ello" + end + + it "ignores non-String keys" do + "hello".sub!(/(ll)/, 'll' => 'r', ll: 'z').should == "hero" + end + + it "uses a key's value only a single time" do + "food".sub!(/o/, 'o' => '0').should == "f0od" + end + + it "uses the hash's default value for missing keys" do + hsh = {} + hsh.default='?' + hsh['o'] = '0' + "food".sub!(/./, hsh).should == "?ood" + end + + it "coerces the hash values with #to_s" do + hsh = {} + hsh.default=[] + hsh['o'] = 0 + obj = mock('!') + obj.should_receive(:to_s).and_return('!') + hsh['f'] = obj + "food!".sub!(/./, hsh).should == "!ood!" + end + + it "uses the hash's value set from default_proc for missing keys" do + hsh = {} + hsh.default_proc = -> k, v { 'lamb' } + "food!".sub!(/./, hsh).should == "lambood!" + end + + it "sets $~ to MatchData of first match and nil when there's none for access from outside" do + 'hello.'.sub!('l', 'l' => 'L') + $~.begin(0).should == 2 + $~[0].should == 'l' + + 'hello.'.sub!('not', 'ot' => 'to') + $~.should == nil + + 'hello.'.sub!(/.(.)/, 'o' => ' hole') + $~[0].should == 'he' + + 'hello.'.sub!(/not/, 'z' => 'glark') + $~.should == nil + end + + it "doesn't interpolate special sequences like \\1 for the block's return value" do + repl = '\& \0 \1 \` \\\' \+ \\\\ foo' + "hello".sub!(/(.+)/, 'hello' => repl ).should == repl + end +end + +describe "String#sub with pattern and without replacement and block" do + it "raises a ArgumentError" do + -> { "abca".sub(/a/) }.should raise_error(ArgumentError) + end +end + +describe "String#sub! with pattern and without replacement and block" do + it "raises a ArgumentError" do + -> { "abca".sub!(/a/) }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/succ_spec.rb b/spec/ruby/core/string/succ_spec.rb new file mode 100644 index 0000000000..65047e0aa2 --- /dev/null +++ b/spec/ruby/core/string/succ_spec.rb @@ -0,0 +1,11 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/succ' + +describe "String#succ" do + it_behaves_like :string_succ, :succ +end + +describe "String#succ!" do + it_behaves_like :string_succ_bang, :"succ!" +end diff --git a/spec/ruby/core/string/sum_spec.rb b/spec/ruby/core/string/sum_spec.rb new file mode 100644 index 0000000000..c283b7c254 --- /dev/null +++ b/spec/ruby/core/string/sum_spec.rb @@ -0,0 +1,22 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#sum" do + it "returns a basic n-bit checksum of the characters in self" do + "ruby".sum.should == 450 + "ruby".sum(8).should == 194 + "rubinius".sum(23).should == 881 + end + + it "tries to convert n to an integer using to_int" do + obj = mock('8') + obj.should_receive(:to_int).and_return(8) + + "hello".sum(obj).should == "hello".sum(8) + end + + it "returns sum of the bytes in self if n less or equal to zero" do + "xyz".sum(0).should == 363 + "xyz".sum(-10).should == 363 + end +end diff --git a/spec/ruby/core/string/swapcase_spec.rb b/spec/ruby/core/string/swapcase_spec.rb new file mode 100644 index 0000000000..011a213501 --- /dev/null +++ b/spec/ruby/core/string/swapcase_spec.rb @@ -0,0 +1,193 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#swapcase" do + it "returns a new string with all uppercase chars from self converted to lowercase and vice versa" do + "Hello".swapcase.should == "hELLO" + "cYbEr_PuNk11".swapcase.should == "CyBeR_pUnK11" + "+++---111222???".swapcase.should == "+++---111222???" + end + + it "returns a String in the same encoding as self" do + "Hello".encode("US-ASCII").swapcase.encoding.should == Encoding::US_ASCII + end + + describe "full Unicode case mapping" do + it "works for all of Unicode with no option" do + "äÖü".swapcase.should == "ÄöÜ" + end + + it "updates string metadata" do + swapcased = "Aßet".swapcase + + swapcased.should == "aSSET" + swapcased.size.should == 5 + swapcased.bytesize.should == 5 + swapcased.ascii_only?.should be_true + end + end + + describe "ASCII-only case mapping" do + it "does not swapcase non-ASCII characters" do + "aßet".swapcase(:ascii).should == "AßET" + end + + it "works with substrings" do + "prefix aTé"[-3..-1].swapcase(:ascii).should == "Até" + end + end + + describe "full Unicode case mapping adapted for Turkic languages" do + it "swaps case of ASCII characters according to Turkic semantics" do + "aiS".swapcase(:turkic).should == "Aİs" + end + + it "allows Lithuanian as an extra option" do + "aiS".swapcase(:turkic, :lithuanian).should == "Aİs" + end + + it "does not allow any other additional option" do + -> { "aiS".swapcase(:turkic, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "full Unicode case mapping adapted for Lithuanian" do + it "currently works the same as full Unicode case mapping" do + "Iß".swapcase(:lithuanian).should == "iSS" + end + + it "allows Turkic as an extra option (and applies Turkic semantics)" do + "iS".swapcase(:lithuanian, :turkic).should == "İs" + end + + it "does not allow any other additional option" do + -> { "aiS".swapcase(:lithuanian, :ascii) }.should raise_error(ArgumentError) + end + end + + it "does not allow the :fold option for upcasing" do + -> { "abc".swapcase(:fold) }.should raise_error(ArgumentError) + end + + it "does not allow invalid options" do + -> { "abc".swapcase(:invalid_option) }.should raise_error(ArgumentError) + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("").swapcase.should be_an_instance_of(String) + StringSpecs::MyString.new("hello").swapcase.should be_an_instance_of(String) + end +end + +describe "String#swapcase!" do + it "modifies self in place" do + a = "cYbEr_PuNk11" + a.swapcase!.should equal(a) + a.should == "CyBeR_pUnK11" + end + + it "modifies self in place for non-ascii-compatible encodings" do + a = "cYbEr_PuNk11".encode("utf-16le") + a.swapcase! + a.should == "CyBeR_pUnK11".encode("utf-16le") + end + + describe "full Unicode case mapping" do + it "modifies self in place for all of Unicode with no option" do + a = "äÖü" + a.swapcase! + a.should == "ÄöÜ" + end + + it "works for non-ascii-compatible encodings" do + a = "äÖü".encode("utf-16le") + a.swapcase! + a.should == "ÄöÜ".encode("utf-16le") + end + + it "updates string metadata" do + swapcased = "Aßet" + swapcased.swapcase! + + swapcased.should == "aSSET" + swapcased.size.should == 5 + swapcased.bytesize.should == 5 + swapcased.ascii_only?.should be_true + end + end + + describe "modifies self in place for ASCII-only case mapping" do + it "does not swapcase non-ASCII characters" do + a = "aßet" + a.swapcase!(:ascii) + a.should == "AßET" + end + + it "works for non-ascii-compatible encodings" do + a = "aBc".encode("utf-16le") + a.swapcase!(:ascii) + a.should == "AbC".encode("utf-16le") + end + end + + describe "modifies self in place for full Unicode case mapping adapted for Turkic languages" do + it "swaps case of ASCII characters according to Turkic semantics" do + a = "aiS" + a.swapcase!(:turkic) + a.should == "Aİs" + end + + it "allows Lithuanian as an extra option" do + a = "aiS" + a.swapcase!(:turkic, :lithuanian) + a.should == "Aİs" + end + + it "does not allow any other additional option" do + -> { a = "aiS"; a.swapcase!(:turkic, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "full Unicode case mapping adapted for Lithuanian" do + it "currently works the same as full Unicode case mapping" do + a = "Iß" + a.swapcase!(:lithuanian) + a.should == "iSS" + end + + it "allows Turkic as an extra option (and applies Turkic semantics)" do + a = "iS" + a.swapcase!(:lithuanian, :turkic) + a.should == "İs" + end + + it "does not allow any other additional option" do + -> { a = "aiS"; a.swapcase!(:lithuanian, :ascii) }.should raise_error(ArgumentError) + end + end + + it "does not allow the :fold option for upcasing" do + -> { a = "abc"; a.swapcase!(:fold) }.should raise_error(ArgumentError) + end + + it "does not allow invalid options" do + -> { a = "abc"; a.swapcase!(:invalid_option) }.should raise_error(ArgumentError) + end + + it "returns nil if no modifications were made" do + a = "+++---111222???" + a.swapcase!.should == nil + a.should == "+++---111222???" + + "".swapcase!.should == nil + end + + it "raises a FrozenError when self is frozen" do + ["", "hello"].each do |a| + a.freeze + -> { a.swapcase! }.should raise_error(FrozenError) + end + end +end diff --git a/spec/ruby/core/string/to_c_spec.rb b/spec/ruby/core/string/to_c_spec.rb new file mode 100644 index 0000000000..1813890e72 --- /dev/null +++ b/spec/ruby/core/string/to_c_spec.rb @@ -0,0 +1,53 @@ +require_relative '../../spec_helper' +require_relative '../../shared/kernel/complex' +require_relative 'fixtures/to_c' + +describe "String#to_c" do + it_behaves_like :kernel_complex, :to_c_method, StringSpecs +end + +describe "String#to_c" do + it "returns a complex number with 0 as the real part, 0 as the imaginary part for unrecognised Strings" do + 'ruby'.to_c.should == Complex(0, 0) + end + + it "ignores trailing garbage" do + '79+4iruby'.to_c.should == Complex(79, 4) + '7__9+4__0i'.to_c.should == Complex(7, 0) + end + + context "it treats special float value strings as characters" do + it "parses any string that starts with 'I' as 1i" do + 'Infinity'.to_c.should == Complex(0, 1) + '-Infinity'.to_c.should == Complex(0, -1) + 'Insecure'.to_c.should == Complex(0, 1) + '-Insecure'.to_c.should == Complex(0, -1) + end + + it "does not parse any numeric information in 'NaN'" do + 'NaN'.to_c.should == Complex(0, 0) + end + end + + it "allows null-byte" do + "1-2i\0".to_c.should == Complex(1, -2) + "1\0-2i".to_c.should == Complex(1, 0) + "\01-2i".to_c.should == Complex(0, 0) + end + + it "raises Encoding::CompatibilityError if String is in not ASCII-compatible encoding" do + -> { + '79+4i'.encode("UTF-16").to_c + }.should raise_error(Encoding::CompatibilityError, "ASCII incompatible encoding: UTF-16") + end + + it "treats a sequence of underscores as an end of Complex string" do + "5+3_1i".to_c.should == Complex(5, 31) + "5+3__1i".to_c.should == Complex(5) + "5+3___1i".to_c.should == Complex(5) + + "12_3".to_c.should == Complex(123) + "12__3".to_c.should == Complex(12) + "12___3".to_c.should == Complex(12) + end +end diff --git a/spec/ruby/core/string/to_f_spec.rb b/spec/ruby/core/string/to_f_spec.rb new file mode 100644 index 0000000000..abfd2517b6 --- /dev/null +++ b/spec/ruby/core/string/to_f_spec.rb @@ -0,0 +1,142 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +# src.scan(/[+-]?[\d_]+\.[\d_]+(e[+-]?[\d_]+)?\b|[+-]?[\d_]+e[+-]?[\d_]+\b/i) + +describe "String#to_f" do + it "treats leading characters of self as a floating point number" do + "123.45e1".to_f.should == 1234.5 + "45.67 degrees".to_f.should == 45.67 + "0".to_f.should == 0.0 + + ".5".to_f.should == 0.5 + ".5e1".to_f.should == 5.0 + "5.".to_f.should == 5.0 + "5e".to_f.should == 5.0 + "5E".to_f.should == 5.0 + end + + it "treats special float value strings as characters" do + "NaN".to_f.should == 0 + "Infinity".to_f.should == 0 + "-Infinity".to_f.should == 0 + end + + it "allows for varying case" do + "123.45e1".to_f.should == 1234.5 + "123.45E1".to_f.should == 1234.5 + end + + it "allows for varying signs" do + "+123.45e1".to_f.should == +123.45e1 + "-123.45e1".to_f.should == -123.45e1 + "123.45e+1".to_f.should == 123.45e+1 + "123.45e-1".to_f.should == 123.45e-1 + "+123.45e+1".to_f.should == +123.45e+1 + "+123.45e-1".to_f.should == +123.45e-1 + "-123.45e+1".to_f.should == -123.45e+1 + "-123.45e-1".to_f.should == -123.45e-1 + end + + it "allows for underscores, even in the decimal side" do + "1_234_567.890_1".to_f.should == 1_234_567.890_1 + end + + it "returns 0 for strings with leading underscores" do + "_9".to_f.should == 0 + end + + it "stops if the underscore is not followed or preceded by a number" do + "1__2".to_f.should == 1.0 + "1_.2".to_f.should == 1.0 + "1._2".to_f.should == 1.0 + "1.2_e2".to_f.should == 1.2 + "1.2e_2".to_f.should == 1.2 + "1_x2".to_f.should == 1.0 + "1x_2".to_f.should == 1.0 + "+_1".to_f.should == 0.0 + "-_1".to_f.should == 0.0 + end + + it "does not allow prefixes to autodetect the base" do + "0b10".to_f.should == 0 + "010".to_f.should == 10 + "0o10".to_f.should == 0 + "0d10".to_f.should == 0 + "0x10".to_f.should == 0 + end + + it "treats any non-numeric character other than '.', 'e' and '_' as terminals" do + "blah".to_f.should == 0 + "1b5".to_f.should == 1 + "1d5".to_f.should == 1 + "1o5".to_f.should == 1 + "1xx5".to_f.should == 1 + "x5".to_f.should == 0 + end + + it "takes an optional sign" do + "-45.67 degrees".to_f.should == -45.67 + "+45.67 degrees".to_f.should == 45.67 + "-5_5e-5_0".to_f.should == -55e-50 + "-".to_f.should == 0.0 + (1.0 / "-0".to_f).to_s.should == "-Infinity" + end + + it "treats a second 'e' as terminal" do + "1.234e1e2".to_f.should == 1.234e1 + end + + it "treats a second '.' as terminal" do + "1.2.3".to_f.should == 1.2 + end + + it "treats a '.' after an 'e' as terminal" do + "1.234e1.9".to_f.should == 1.234e1 + end + + it "returns 0.0 if the conversion fails" do + "bad".to_f.should == 0.0 + "thx1138".to_f.should == 0.0 + end + + it "ignores leading and trailing whitespace" do + " 1.2".to_f.should == 1.2 + "1.2 ".to_f.should == 1.2 + " 1.2 ".to_f.should == 1.2 + "\t1.2".to_f.should == 1.2 + "\n1.2".to_f.should == 1.2 + "\v1.2".to_f.should == 1.2 + "\f1.2".to_f.should == 1.2 + "\r1.2".to_f.should == 1.2 + end + + it "treats non-printable ASCII characters as terminals" do + "\0001.2".to_f.should == 0 + "\0011.2".to_f.should == 0 + "\0371.2".to_f.should == 0 + "\1771.2".to_f.should == 0 + "\2001.2".b.to_f.should == 0 + "\3771.2".b.to_f.should == 0 + end + + ruby_version_is "3.2.3" do + it "raises Encoding::CompatibilityError if String is in not ASCII-compatible encoding" do + -> { + '1.2'.encode("UTF-16").to_f + }.should raise_error(Encoding::CompatibilityError, "ASCII incompatible encoding: UTF-16") + end + end + + it "allows String representation without a fractional part" do + "1.".to_f.should == 1.0 + "+1.".to_f.should == 1.0 + "-1.".to_f.should == -1.0 + "1.e+0".to_f.should == 1.0 + "1.e+0".to_f.should == 1.0 + + ruby_bug "#20705", ""..."3.4" do + "1.e-2".to_f.should be_close(0.01, TOLERANCE) + end + end +end diff --git a/spec/ruby/core/string/to_i_spec.rb b/spec/ruby/core/string/to_i_spec.rb new file mode 100644 index 0000000000..39f69acda3 --- /dev/null +++ b/spec/ruby/core/string/to_i_spec.rb @@ -0,0 +1,349 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#to_i" do + it "returns 0 for strings with leading underscores" do + "_123".to_i.should == 0 + end + + it "ignores underscores in between the digits" do + "1_2_3asdf".to_i.should == 123 + end + + it "ignores multiple non-consecutive underscores when the first digit is 0" do + (2..16).each do |base| + "0_0_010".to_i(base).should == base; + end + end + + it "bails out at the first double underscore if the first digit is 0" do + (2..16).each do |base| + "010__1".to_i(base).should == base; + end + end + + it "ignores leading whitespaces" do + [ " 123", " 123", "\r\n\r\n123", "\t\t123", + "\r\n\t\n123", " \t\n\r\t 123"].each do |str| + str.to_i.should == 123 + end + end + + it "ignores subsequent invalid characters" do + "123asdf".to_i.should == 123 + "123#123".to_i.should == 123 + "123 456".to_i.should == 123 + end + + it "returns 0 if self is no valid integer-representation" do + [ "++2", "+-2", "--2" ].each do |str| + str.to_i.should == 0 + end + end + + it "accepts '+' at the beginning of a String" do + "+0d56".to_i.should == 56 + end + + it "interprets leading characters as a number in the given base" do + "100110010010".to_i(2).should == 0b100110010010 + "100110201001".to_i(3).should == 186409 + "103110201001".to_i(4).should == 5064769 + "103110241001".to_i(5).should == 55165126 + "153110241001".to_i(6).should == 697341529 + "153160241001".to_i(7).should == 3521513430 + "153160241701".to_i(8).should == 14390739905 + "853160241701".to_i(9).should == 269716550518 + "853160241791".to_i(10).should == 853160241791 + + "F00D_BE_1337".to_i(16).should == 0xF00D_BE_1337 + "-hello_world".to_i(32).should == -18306744 + "abcXYZ".to_i(36).should == 623741435 + + ("z" * 24).to_i(36).should == 22452257707354557240087211123792674815 + + "5e10".to_i.should == 5 + end + + it "auto-detects base 8 via leading 0 when base = 0" do + "01778".to_i(0).should == 0177 + "-01778".to_i(0).should == -0177 + end + + it "auto-detects base 2 via 0b when base = 0" do + "0b112".to_i(0).should == 0b11 + "-0b112".to_i(0).should == -0b11 + end + + it "auto-detects base 10 via 0d when base = 0" do + "0d19A".to_i(0).should == 19 + "-0d19A".to_i(0).should == -19 + end + + it "auto-detects base 8 via 0o when base = 0" do + "0o178".to_i(0).should == 0o17 + "-0o178".to_i(0).should == -0o17 + end + + it "auto-detects base 16 via 0x when base = 0" do + "0xFAZ".to_i(0).should == 0xFA + "-0xFAZ".to_i(0).should == -0xFA + end + + it "auto-detects base 10 with no base specifier when base = 0" do + "1234567890ABC".to_i(0).should == 1234567890 + "-1234567890ABC".to_i(0).should == -1234567890 + end + + it "doesn't handle foreign base specifiers when base is > 0" do + [2, 3, 4, 8, 10].each do |base| + "0111".to_i(base).should == "111".to_i(base) + + "0b11".to_i(base).should == (base == 2 ? 0b11 : 0) + "0d11".to_i(base).should == (base == 10 ? 0d11 : 0) + "0o11".to_i(base).should == (base == 8 ? 0o11 : 0) + "0xFA".to_i(base).should == 0 + end + + "0xD00D".to_i(16).should == 0xD00D + + "0b11".to_i(16).should == 0xb11 + "0d11".to_i(16).should == 0xd11 + "0o11".to_i(25).should == 15026 + "0x11".to_i(34).should == 38183 + + "0B11".to_i(16).should == 0xb11 + "0D11".to_i(16).should == 0xd11 + "0O11".to_i(25).should == 15026 + "0X11".to_i(34).should == 38183 + end + + it "tries to convert the base to an integer using to_int" do + obj = mock('8') + obj.should_receive(:to_int).and_return(8) + + "777".to_i(obj).should == 0777 + end + + it "requires that the sign if any appears before the base specifier" do + "0b-1".to_i( 2).should == 0 + "0d-1".to_i(10).should == 0 + "0o-1".to_i( 8).should == 0 + "0x-1".to_i(16).should == 0 + + "0b-1".to_i(2).should == 0 + "0o-1".to_i(8).should == 0 + "0d-1".to_i(10).should == 0 + "0x-1".to_i(16).should == 0 + end + + it "raises an ArgumentError for illegal bases (1, < 0 or > 36)" do + -> { "".to_i(1) }.should raise_error(ArgumentError) + -> { "".to_i(-1) }.should raise_error(ArgumentError) + -> { "".to_i(37) }.should raise_error(ArgumentError) + end + + it "returns an Integer for long strings with trailing spaces" do + "0 ".to_i.should == 0 + "0 ".to_i.should be_an_instance_of(Integer) + + "10 ".to_i.should == 10 + "10 ".to_i.should be_an_instance_of(Integer) + + "-10 ".to_i.should == -10 + "-10 ".to_i.should be_an_instance_of(Integer) + end + + it "returns an Integer for long strings with leading spaces" do + " 0".to_i.should == 0 + " 0".to_i.should be_an_instance_of(Integer) + + " 10".to_i.should == 10 + " 10".to_i.should be_an_instance_of(Integer) + + " -10".to_i.should == -10 + " -10".to_i.should be_an_instance_of(Integer) + end + + it "returns the correct Integer for long strings" do + "245789127594125924165923648312749312749327482".to_i.should == 245789127594125924165923648312749312749327482 + "-245789127594125924165923648312749312749327482".to_i.should == -245789127594125924165923648312749312749327482 + end +end + +describe "String#to_i with bases" do + it "parses a String in base 2" do + str = "10" * 50 + str.to_i(2).to_s(2).should == str + end + + it "parses a String in base 3" do + str = "120" * 33 + str.to_i(3).to_s(3).should == str + end + + it "parses a String in base 4" do + str = "1230" * 25 + str.to_i(4).to_s(4).should == str + end + + it "parses a String in base 5" do + str = "12340" * 20 + str.to_i(5).to_s(5).should == str + end + + it "parses a String in base 6" do + str = "123450" * 16 + str.to_i(6).to_s(6).should == str + end + + it "parses a String in base 7" do + str = "1234560" * 14 + str.to_i(7).to_s(7).should == str + end + + it "parses a String in base 8" do + str = "12345670" * 12 + str.to_i(8).to_s(8).should == str + end + + it "parses a String in base 9" do + str = "123456780" * 11 + str.to_i(9).to_s(9).should == str + end + + it "parses a String in base 10" do + str = "1234567890" * 10 + str.to_i(10).to_s(10).should == str + end + + it "parses a String in base 11" do + str = "1234567890a" * 9 + str.to_i(11).to_s(11).should == str + end + + it "parses a String in base 12" do + str = "1234567890ab" * 8 + str.to_i(12).to_s(12).should == str + end + + it "parses a String in base 13" do + str = "1234567890abc" * 7 + str.to_i(13).to_s(13).should == str + end + + it "parses a String in base 14" do + str = "1234567890abcd" * 7 + str.to_i(14).to_s(14).should == str + end + + it "parses a String in base 15" do + str = "1234567890abcde" * 6 + str.to_i(15).to_s(15).should == str + end + + it "parses a String in base 16" do + str = "1234567890abcdef" * 6 + str.to_i(16).to_s(16).should == str + end + + it "parses a String in base 17" do + str = "1234567890abcdefg" * 5 + str.to_i(17).to_s(17).should == str + end + + it "parses a String in base 18" do + str = "1234567890abcdefgh" * 5 + str.to_i(18).to_s(18).should == str + end + + it "parses a String in base 19" do + str = "1234567890abcdefghi" * 5 + str.to_i(19).to_s(19).should == str + end + + it "parses a String in base 20" do + str = "1234567890abcdefghij" * 5 + str.to_i(20).to_s(20).should == str + end + + it "parses a String in base 21" do + str = "1234567890abcdefghijk" * 4 + str.to_i(21).to_s(21).should == str + end + + it "parses a String in base 22" do + str = "1234567890abcdefghijkl" * 4 + str.to_i(22).to_s(22).should == str + end + + it "parses a String in base 23" do + str = "1234567890abcdefghijklm" * 4 + str.to_i(23).to_s(23).should == str + end + + it "parses a String in base 24" do + str = "1234567890abcdefghijklmn" * 4 + str.to_i(24).to_s(24).should == str + end + + it "parses a String in base 25" do + str = "1234567890abcdefghijklmno" * 4 + str.to_i(25).to_s(25).should == str + end + + it "parses a String in base 26" do + str = "1234567890abcdefghijklmnop" * 3 + str.to_i(26).to_s(26).should == str + end + + it "parses a String in base 27" do + str = "1234567890abcdefghijklmnopq" * 3 + str.to_i(27).to_s(27).should == str + end + + it "parses a String in base 28" do + str = "1234567890abcdefghijklmnopqr" * 3 + str.to_i(28).to_s(28).should == str + end + + it "parses a String in base 29" do + str = "1234567890abcdefghijklmnopqrs" * 3 + str.to_i(29).to_s(29).should == str + end + + it "parses a String in base 30" do + str = "1234567890abcdefghijklmnopqrst" * 3 + str.to_i(30).to_s(30).should == str + end + + it "parses a String in base 31" do + str = "1234567890abcdefghijklmnopqrstu" * 3 + str.to_i(31).to_s(31).should == str + end + + it "parses a String in base 32" do + str = "1234567890abcdefghijklmnopqrstuv" * 3 + str.to_i(32).to_s(32).should == str + end + + it "parses a String in base 33" do + str = "1234567890abcdefghijklmnopqrstuvw" * 3 + str.to_i(33).to_s(33).should == str + end + + it "parses a String in base 34" do + str = "1234567890abcdefghijklmnopqrstuvwx" * 2 + str.to_i(34).to_s(34).should == str + end + + it "parses a String in base 35" do + str = "1234567890abcdefghijklmnopqrstuvwxy" * 2 + str.to_i(35).to_s(35).should == str + end + + it "parses a String in base 36" do + str = "1234567890abcdefghijklmnopqrstuvwxyz" * 2 + str.to_i(36).to_s(36).should == str + end +end diff --git a/spec/ruby/core/string/to_r_spec.rb b/spec/ruby/core/string/to_r_spec.rb new file mode 100644 index 0000000000..4ffbb10d98 --- /dev/null +++ b/spec/ruby/core/string/to_r_spec.rb @@ -0,0 +1,62 @@ +require_relative '../../spec_helper' + +describe "String#to_r" do + it "returns a Rational object" do + String.new.to_r.should be_an_instance_of(Rational) + end + + it "returns (0/1) for the empty String" do + "".to_r.should == Rational(0, 1) + end + + it "returns (n/1) for a String starting with a decimal _n_" do + "2".to_r.should == Rational(2, 1) + "1765".to_r.should == Rational(1765, 1) + end + + it "ignores trailing characters" do + "2 foo".to_r.should == Rational(2, 1) + "1765, ".to_r.should == Rational(1765, 1) + end + + it "ignores leading spaces" do + " 2".to_r.should == Rational(2, 1) + " 1765, ".to_r.should == Rational(1765, 1) + end + + it "does not ignore arbitrary, non-numeric leading characters" do + "The rational form of 33 is...".to_r.should_not == Rational(33, 1) + "a1765, ".to_r.should_not == Rational(1765, 1) + end + + it "treats leading hyphen as minus signs" do + "-20".to_r.should == Rational(-20, 1) + end + + it "accepts leading plus signs" do + "+20".to_r.should == Rational(20, 1) + end + + it "does not treat a leading period without a numeric prefix as a decimal point" do + ".9".to_r.should_not == Rational(8106479329266893, 9007199254740992) + end + + it "understands decimal points" do + "3.33".to_r.should == Rational(333, 100) + "-3.33".to_r.should == Rational(-333, 100) + end + + it "ignores underscores between numbers" do + "190_22".to_r.should == Rational(19022, 1) + "-190_22.7".to_r.should == Rational(-190227, 10) + end + + it "understands a forward slash as separating the numerator from the denominator" do + "20/3".to_r.should == Rational(20, 3) + " -19.10/3".to_r.should == Rational(-191, 30) + end + + it "returns (0/1) for Strings it can't parse" do + "glark".to_r.should == Rational(0,1) + end +end diff --git a/spec/ruby/core/string/to_s_spec.rb b/spec/ruby/core/string/to_s_spec.rb new file mode 100644 index 0000000000..e5872745a8 --- /dev/null +++ b/spec/ruby/core/string/to_s_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/to_s' + +describe "String#to_s" do + it_behaves_like :string_to_s, :to_s +end diff --git a/spec/ruby/core/string/to_str_spec.rb b/spec/ruby/core/string/to_str_spec.rb new file mode 100644 index 0000000000..e24262a7ae --- /dev/null +++ b/spec/ruby/core/string/to_str_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/to_s' + +describe "String#to_str" do + it_behaves_like :string_to_s, :to_str +end diff --git a/spec/ruby/core/string/to_sym_spec.rb b/spec/ruby/core/string/to_sym_spec.rb new file mode 100644 index 0000000000..f9135211ce --- /dev/null +++ b/spec/ruby/core/string/to_sym_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' +require_relative 'shared/to_sym' + +describe "String#to_sym" do + it_behaves_like :string_to_sym, :to_sym +end diff --git a/spec/ruby/core/string/tr_s_spec.rb b/spec/ruby/core/string/tr_s_spec.rb new file mode 100644 index 0000000000..dd72da440c --- /dev/null +++ b/spec/ruby/core/string/tr_s_spec.rb @@ -0,0 +1,133 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#tr_s" do + it "returns a string processed according to tr with newly duplicate characters removed" do + "hello".tr_s('l', 'r').should == "hero" + "hello".tr_s('el', '*').should == "h*o" + "hello".tr_s('el', 'hx').should == "hhxo" + "hello".tr_s('o', '.').should == "hell." + end + + it "accepts c1-c2 notation to denote ranges of characters" do + "hello".tr_s('a-y', 'b-z').should == "ifmp" + "123456789".tr_s("2-5", "abcdefg").should == "1abcd6789" + "hello ^--^".tr_s("e-", "__").should == "h_llo ^_^" + "hello ^--^".tr_s("---", "_").should == "hello ^_^" + end + + ruby_bug "#19769", ""..."3.3" do + it "accepts c1-c1 notation to denote range of one character" do + "hello".tr_s('e-e', 'x').should == "hxllo" + "123456789".tr_s("2-23","xy").should == "1xy456789" + "hello ^-^".tr_s("e-", "a-a_").should == "hallo ^_^" + "hello ^-^".tr_s("---o", "_a").should == "hella ^_^" + end + end + + it "pads to_str with its last char if it is shorter than from_string" do + "this".tr_s("this", "x").should == "x" + end + + it "translates chars not in from_string when it starts with a ^" do + "hello".tr_s('^aeiou', '*').should == "*e*o" + "123456789".tr_s("^345", "abc").should == "c345c" + "abcdefghijk".tr_s("^d-g", "9131").should == "1defg1" + + "hello ^_^".tr_s("a-e^e", ".").should == "h.llo ._." + "hello ^_^".tr_s("^^", ".").should == ".^.^" + "hello ^_^".tr_s("^", "x").should == "hello x_x" + "hello ^-^".tr_s("^-^", "x").should == "x^-^" + "hello ^-^".tr_s("^^-^", "x").should == "x^x^" + "hello ^-^".tr_s("^---", "x").should == "x-x" + "hello ^-^".tr_s("^---l-o", "x").should == "xllox-x" + end + + it "tries to convert from_str and to_str to strings using to_str" do + from_str = mock('ab') + from_str.should_receive(:to_str).and_return("ab") + + to_str = mock('AB') + to_str.should_receive(:to_str).and_return("AB") + + "bla".tr_s(from_str, to_str).should == "BlA" + end + + it "returns String instances when called on a subclass" do + StringSpecs::MyString.new("hello").tr_s("e", "a").should be_an_instance_of(String) + end + + # http://redmine.ruby-lang.org/issues/show/1839 + it "can replace a 7-bit ASCII character with a multibyte one" do + a = "uber" + a.encoding.should == Encoding::UTF_8 + b = a.tr_s("u","ü") + b.should == "über" + b.encoding.should == Encoding::UTF_8 + end + + it "can replace multiple 7-bit ASCII characters with a multibyte one" do + a = "uuuber" + a.encoding.should == Encoding::UTF_8 + b = a.tr_s("u","ü") + b.should == "über" + b.encoding.should == Encoding::UTF_8 + end + + it "can replace a multibyte character with a single byte one" do + a = "über" + a.encoding.should == Encoding::UTF_8 + b = a.tr_s("ü","u") + b.should == "uber" + b.encoding.should == Encoding::UTF_8 + end + + it "can replace multiple multibyte characters with a single byte one" do + a = "üüüber" + a.encoding.should == Encoding::UTF_8 + b = a.tr_s("ü","u") + b.should == "uber" + b.encoding.should == Encoding::UTF_8 + end + + it "does not replace a multibyte character where part of the bytes match the tr string" do + str = "椎名深夏" + a = "\u0080\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008E\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009E\u009F" + b = "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ" + str.tr_s(a, b).should == "椎名深夏" + end + + +end + +describe "String#tr_s!" do + it "modifies self in place" do + s = "hello" + s.tr_s!("l", "r").should == "hero" + s.should == "hero" + end + + it "returns nil if no modification was made" do + s = "hello" + s.tr_s!("za", "yb").should == nil + s.tr_s!("", "").should == nil + s.should == "hello" + end + + it "does not modify self if from_str is empty" do + s = "hello" + s.tr_s!("", "").should == nil + s.should == "hello" + s.tr_s!("", "yb").should == nil + s.should == "hello" + end + + it "raises a FrozenError if self is frozen" do + s = "hello".freeze + -> { s.tr_s!("el", "ar") }.should raise_error(FrozenError) + -> { s.tr_s!("l", "r") }.should raise_error(FrozenError) + -> { s.tr_s!("", "") }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/tr_spec.rb b/spec/ruby/core/string/tr_spec.rb new file mode 100644 index 0000000000..75841a974f --- /dev/null +++ b/spec/ruby/core/string/tr_spec.rb @@ -0,0 +1,128 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#tr" do + it "returns a new string with the characters from from_string replaced by the ones in to_string" do + "hello".tr('aeiou', '*').should == "h*ll*" + "hello".tr('el', 'ip').should == "hippo" + "Lisp".tr("Lisp", "Ruby").should == "Ruby" + end + + it "accepts c1-c2 notation to denote ranges of characters" do + "hello".tr('a-y', 'b-z').should == "ifmmp" + "123456789".tr("2-5","abcdefg").should == "1abcd6789" + "hello ^-^".tr("e-", "__").should == "h_llo ^_^" + "hello ^-^".tr("---", "_").should == "hello ^_^" + end + + ruby_bug "#19769", ""..."3.3" do + it "accepts c1-c1 notation to denote range of one character" do + "hello".tr('e-e', 'x').should == "hxllo" + "123456789".tr("2-23","xy").should == "1xy456789" + "hello ^-^".tr("e-", "a-a_").should == "hallo ^_^" + "hello ^-^".tr("---o", "_a").should == "hella ^_^" + end + end + + it "pads to_str with its last char if it is shorter than from_string" do + "this".tr("this", "x").should == "xxxx" + "hello".tr("a-z", "A-H.").should == "HE..." + end + + it "raises an ArgumentError a descending range in the replacement as containing just the start character" do + -> { "hello".tr("a-y", "z-b") }.should raise_error(ArgumentError) + end + + it "raises an ArgumentError a descending range in the source as empty" do + -> { "hello".tr("l-a", "z") }.should raise_error(ArgumentError) + end + + it "translates chars not in from_string when it starts with a ^" do + "hello".tr('^aeiou', '*').should == "*e**o" + "123456789".tr("^345", "abc").should == "cc345cccc" + "abcdefghijk".tr("^d-g", "9131").should == "111defg1111" + + "hello ^_^".tr("a-e^e", ".").should == "h.llo ._." + "hello ^_^".tr("^^", ".").should == "......^.^" + "hello ^_^".tr("^", "x").should == "hello x_x" + "hello ^-^".tr("^-^", "x").should == "xxxxxx^-^" + "hello ^-^".tr("^^-^", "x").should == "xxxxxx^x^" + "hello ^-^".tr("^---", "x").should == "xxxxxxx-x" + "hello ^-^".tr("^---l-o", "x").should == "xxlloxx-x" + end + + it "supports non-injective replacements" do + "hello".tr("helo", "1212").should == "12112" + end + + it "tries to convert from_str and to_str to strings using to_str" do + from_str = mock('ab') + from_str.should_receive(:to_str).and_return("ab") + + to_str = mock('AB') + to_str.should_receive(:to_str).and_return("AB") + + "bla".tr(from_str, to_str).should == "BlA" + end + + it "returns Stringinstances when called on a subclass" do + StringSpecs::MyString.new("hello").tr("e", "a").should be_an_instance_of(String) + end + + # http://redmine.ruby-lang.org/issues/show/1839 + it "can replace a 7-bit ASCII character with a multibyte one" do + a = "uber" + a.encoding.should == Encoding::UTF_8 + b = a.tr("u","ü") + b.should == "über" + b.encoding.should == Encoding::UTF_8 + end + + it "can replace a multibyte character with a single byte one" do + a = "über" + a.encoding.should == Encoding::UTF_8 + b = a.tr("ü","u") + b.should == "uber" + b.encoding.should == Encoding::UTF_8 + end + + it "does not replace a multibyte character where part of the bytes match the tr string" do + str = "椎名深夏" + a = "\u0080\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008E\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009E\u009F" + b = "€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ" + str.tr(a, b).should == "椎名深夏" + end + +end + +describe "String#tr!" do + it "modifies self in place" do + s = "abcdefghijklmnopqR" + s.tr!("cdefg", "12").should == "ab12222hijklmnopqR" + s.should == "ab12222hijklmnopqR" + end + + it "returns nil if no modification was made" do + s = "hello" + s.tr!("za", "yb").should == nil + s.tr!("", "").should == nil + s.should == "hello" + end + + it "does not modify self if from_str is empty" do + s = "hello" + s.tr!("", "").should == nil + s.should == "hello" + s.tr!("", "yb").should == nil + s.should == "hello" + end + + it "raises a FrozenError if self is frozen" do + s = "abcdefghijklmnopqR".freeze + -> { s.tr!("cdefg", "12") }.should raise_error(FrozenError) + -> { s.tr!("R", "S") }.should raise_error(FrozenError) + -> { s.tr!("", "") }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/try_convert_spec.rb b/spec/ruby/core/string/try_convert_spec.rb new file mode 100644 index 0000000000..72ce5dd8b2 --- /dev/null +++ b/spec/ruby/core/string/try_convert_spec.rb @@ -0,0 +1,50 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String.try_convert" do + it "returns the argument if it's a String" do + x = String.new + String.try_convert(x).should equal(x) + end + + it "returns the argument if it's a kind of String" do + x = StringSpecs::MyString.new + String.try_convert(x).should equal(x) + end + + it "returns nil when the argument does not respond to #to_str" do + String.try_convert(Object.new).should be_nil + end + + it "sends #to_str to the argument and returns the result if it's nil" do + obj = mock("to_str") + obj.should_receive(:to_str).and_return(nil) + String.try_convert(obj).should be_nil + end + + it "sends #to_str to the argument and returns the result if it's a String" do + x = String.new + obj = mock("to_str") + obj.should_receive(:to_str).and_return(x) + String.try_convert(obj).should equal(x) + end + + it "sends #to_str to the argument and returns the result if it's a kind of String" do + x = StringSpecs::MyString.new + obj = mock("to_str") + obj.should_receive(:to_str).and_return(x) + String.try_convert(obj).should equal(x) + end + + it "sends #to_str to the argument and raises TypeError if it's not a kind of String" do + obj = mock("to_str") + obj.should_receive(:to_str).and_return(Object.new) + -> { String.try_convert obj }.should raise_error(TypeError, "can't convert MockObject to String (MockObject#to_str gives Object)") + end + + it "does not rescue exceptions raised by #to_str" do + obj = mock("to_str") + obj.should_receive(:to_str).and_raise(RuntimeError) + -> { String.try_convert obj }.should raise_error(RuntimeError) + end +end diff --git a/spec/ruby/core/string/uminus_spec.rb b/spec/ruby/core/string/uminus_spec.rb new file mode 100644 index 0000000000..46d88f6704 --- /dev/null +++ b/spec/ruby/core/string/uminus_spec.rb @@ -0,0 +1,6 @@ +require_relative '../../spec_helper' +require_relative 'shared/dedup' + +describe 'String#-@' do + it_behaves_like :string_dedup, :-@ +end diff --git a/spec/ruby/core/string/undump_spec.rb b/spec/ruby/core/string/undump_spec.rb new file mode 100644 index 0000000000..6ff220161c --- /dev/null +++ b/spec/ruby/core/string/undump_spec.rb @@ -0,0 +1,441 @@ +# encoding: utf-8 +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#undump" do + it "does not take into account if a string is frozen" do + '"foo"'.freeze.undump.should_not.frozen? + end + + it "always returns String instance" do + StringSpecs::MyString.new('"foo"').undump.should be_an_instance_of(String) + end + + it "strips outer \"" do + '"foo"'.undump.should == 'foo' + end + + it "returns a string with special characters in \\<char> notation replaced with the characters" do + [ ['"\\a"', "\a"], + ['"\\b"', "\b"], + ['"\\t"', "\t"], + ['"\\n"', "\n"], + ['"\\v"', "\v"], + ['"\\f"', "\f"], + ['"\\r"', "\r"], + ['"\\e"', "\e"] + ].should be_computed_by(:undump) + end + + it "returns a string with unescaped sequences \" and \\" do + [ ['"\\""' , "\""], + ['"\\\\"', "\\"] + ].should be_computed_by(:undump) + end + + it "returns a string with unescaped sequences \\#<char> when # is followed by $, @, {" do + [ ['"\\#$PATH"', "\#$PATH"], + ['"\\#@a"', "\#@a"], + ['"\\#@@a"', "\#@@a"], + ['"\\#{a}"', "\#{a}"] + ].should be_computed_by(:undump) + end + + it "returns a string with # not escaped when followed by any other character" do + [ ['"#"', '#'], + ['"#1"', '#1'] + ].should be_computed_by(:undump) + end + + it "returns a string with printable non-alphanumeric characters" do + [ ['" "', ' '], + ['"!"', '!'], + ['"$"', '$'], + ['"%"', '%'], + ['"&"', '&'], + ['"\'"', '\''], + ['"("', '('], + ['")"', ')'], + ['"*"', '*'], + ['"+"', '+'], + ['","', ','], + ['"-"', '-'], + ['"."', '.'], + ['"/"', '/'], + ['":"', ':'], + ['";"', ';'], + ['"<"', '<'], + ['"="', '='], + ['">"', '>'], + ['"?"', '?'], + ['"@"', '@'], + ['"["', '['], + ['"]"', ']'], + ['"^"', '^'], + ['"_"', '_'], + ['"`"', '`'], + ['"{"', '{'], + ['"|"', '|'], + ['"}"', '}'], + ['"~"', '~'] + ].should be_computed_by(:undump) + end + + it "returns a string with numeric characters unescaped" do + [ ['"0"', "0"], + ['"1"', "1"], + ['"2"', "2"], + ['"3"', "3"], + ['"4"', "4"], + ['"5"', "5"], + ['"6"', "6"], + ['"7"', "7"], + ['"8"', "8"], + ['"9"', "9"], + ].should be_computed_by(:undump) + end + + it "returns a string with upper-case alpha characters unescaped" do + [ ['"A"', 'A'], + ['"B"', 'B'], + ['"C"', 'C'], + ['"D"', 'D'], + ['"E"', 'E'], + ['"F"', 'F'], + ['"G"', 'G'], + ['"H"', 'H'], + ['"I"', 'I'], + ['"J"', 'J'], + ['"K"', 'K'], + ['"L"', 'L'], + ['"M"', 'M'], + ['"N"', 'N'], + ['"O"', 'O'], + ['"P"', 'P'], + ['"Q"', 'Q'], + ['"R"', 'R'], + ['"S"', 'S'], + ['"T"', 'T'], + ['"U"', 'U'], + ['"V"', 'V'], + ['"W"', 'W'], + ['"X"', 'X'], + ['"Y"', 'Y'], + ['"Z"', 'Z'] + ].should be_computed_by(:undump) + end + + it "returns a string with lower-case alpha characters unescaped" do + [ ['"a"', 'a'], + ['"b"', 'b'], + ['"c"', 'c'], + ['"d"', 'd'], + ['"e"', 'e'], + ['"f"', 'f'], + ['"g"', 'g'], + ['"h"', 'h'], + ['"i"', 'i'], + ['"j"', 'j'], + ['"k"', 'k'], + ['"l"', 'l'], + ['"m"', 'm'], + ['"n"', 'n'], + ['"o"', 'o'], + ['"p"', 'p'], + ['"q"', 'q'], + ['"r"', 'r'], + ['"s"', 's'], + ['"t"', 't'], + ['"u"', 'u'], + ['"v"', 'v'], + ['"w"', 'w'], + ['"x"', 'x'], + ['"y"', 'y'], + ['"z"', 'z'] + ].should be_computed_by(:undump) + end + + it "returns a string with \\x notation replaced with non-printing ASCII character" do + [ ['"\\x00"', 0000.chr.force_encoding('utf-8')], + ['"\\x01"', 0001.chr.force_encoding('utf-8')], + ['"\\x02"', 0002.chr.force_encoding('utf-8')], + ['"\\x03"', 0003.chr.force_encoding('utf-8')], + ['"\\x04"', 0004.chr.force_encoding('utf-8')], + ['"\\x05"', 0005.chr.force_encoding('utf-8')], + ['"\\x06"', 0006.chr.force_encoding('utf-8')], + ['"\\x0E"', 0016.chr.force_encoding('utf-8')], + ['"\\x0F"', 0017.chr.force_encoding('utf-8')], + ['"\\x10"', 0020.chr.force_encoding('utf-8')], + ['"\\x11"', 0021.chr.force_encoding('utf-8')], + ['"\\x12"', 0022.chr.force_encoding('utf-8')], + ['"\\x13"', 0023.chr.force_encoding('utf-8')], + ['"\\x14"', 0024.chr.force_encoding('utf-8')], + ['"\\x15"', 0025.chr.force_encoding('utf-8')], + ['"\\x16"', 0026.chr.force_encoding('utf-8')], + ['"\\x17"', 0027.chr.force_encoding('utf-8')], + ['"\\x18"', 0030.chr.force_encoding('utf-8')], + ['"\\x19"', 0031.chr.force_encoding('utf-8')], + ['"\\x1A"', 0032.chr.force_encoding('utf-8')], + ['"\\x1C"', 0034.chr.force_encoding('utf-8')], + ['"\\x1D"', 0035.chr.force_encoding('utf-8')], + ['"\\x1E"', 0036.chr.force_encoding('utf-8')], + ['"\\x1F"', 0037.chr.force_encoding('utf-8')], + ['"\\x7F"', 0177.chr.force_encoding('utf-8')], + ['"\\x80"', 0200.chr.force_encoding('utf-8')], + ['"\\x81"', 0201.chr.force_encoding('utf-8')], + ['"\\x82"', 0202.chr.force_encoding('utf-8')], + ['"\\x83"', 0203.chr.force_encoding('utf-8')], + ['"\\x84"', 0204.chr.force_encoding('utf-8')], + ['"\\x85"', 0205.chr.force_encoding('utf-8')], + ['"\\x86"', 0206.chr.force_encoding('utf-8')], + ['"\\x87"', 0207.chr.force_encoding('utf-8')], + ['"\\x88"', 0210.chr.force_encoding('utf-8')], + ['"\\x89"', 0211.chr.force_encoding('utf-8')], + ['"\\x8A"', 0212.chr.force_encoding('utf-8')], + ['"\\x8B"', 0213.chr.force_encoding('utf-8')], + ['"\\x8C"', 0214.chr.force_encoding('utf-8')], + ['"\\x8D"', 0215.chr.force_encoding('utf-8')], + ['"\\x8E"', 0216.chr.force_encoding('utf-8')], + ['"\\x8F"', 0217.chr.force_encoding('utf-8')], + ['"\\x90"', 0220.chr.force_encoding('utf-8')], + ['"\\x91"', 0221.chr.force_encoding('utf-8')], + ['"\\x92"', 0222.chr.force_encoding('utf-8')], + ['"\\x93"', 0223.chr.force_encoding('utf-8')], + ['"\\x94"', 0224.chr.force_encoding('utf-8')], + ['"\\x95"', 0225.chr.force_encoding('utf-8')], + ['"\\x96"', 0226.chr.force_encoding('utf-8')], + ['"\\x97"', 0227.chr.force_encoding('utf-8')], + ['"\\x98"', 0230.chr.force_encoding('utf-8')], + ['"\\x99"', 0231.chr.force_encoding('utf-8')], + ['"\\x9A"', 0232.chr.force_encoding('utf-8')], + ['"\\x9B"', 0233.chr.force_encoding('utf-8')], + ['"\\x9C"', 0234.chr.force_encoding('utf-8')], + ['"\\x9D"', 0235.chr.force_encoding('utf-8')], + ['"\\x9E"', 0236.chr.force_encoding('utf-8')], + ['"\\x9F"', 0237.chr.force_encoding('utf-8')], + ['"\\xA0"', 0240.chr.force_encoding('utf-8')], + ['"\\xA1"', 0241.chr.force_encoding('utf-8')], + ['"\\xA2"', 0242.chr.force_encoding('utf-8')], + ['"\\xA3"', 0243.chr.force_encoding('utf-8')], + ['"\\xA4"', 0244.chr.force_encoding('utf-8')], + ['"\\xA5"', 0245.chr.force_encoding('utf-8')], + ['"\\xA6"', 0246.chr.force_encoding('utf-8')], + ['"\\xA7"', 0247.chr.force_encoding('utf-8')], + ['"\\xA8"', 0250.chr.force_encoding('utf-8')], + ['"\\xA9"', 0251.chr.force_encoding('utf-8')], + ['"\\xAA"', 0252.chr.force_encoding('utf-8')], + ['"\\xAB"', 0253.chr.force_encoding('utf-8')], + ['"\\xAC"', 0254.chr.force_encoding('utf-8')], + ['"\\xAD"', 0255.chr.force_encoding('utf-8')], + ['"\\xAE"', 0256.chr.force_encoding('utf-8')], + ['"\\xAF"', 0257.chr.force_encoding('utf-8')], + ['"\\xB0"', 0260.chr.force_encoding('utf-8')], + ['"\\xB1"', 0261.chr.force_encoding('utf-8')], + ['"\\xB2"', 0262.chr.force_encoding('utf-8')], + ['"\\xB3"', 0263.chr.force_encoding('utf-8')], + ['"\\xB4"', 0264.chr.force_encoding('utf-8')], + ['"\\xB5"', 0265.chr.force_encoding('utf-8')], + ['"\\xB6"', 0266.chr.force_encoding('utf-8')], + ['"\\xB7"', 0267.chr.force_encoding('utf-8')], + ['"\\xB8"', 0270.chr.force_encoding('utf-8')], + ['"\\xB9"', 0271.chr.force_encoding('utf-8')], + ['"\\xBA"', 0272.chr.force_encoding('utf-8')], + ['"\\xBB"', 0273.chr.force_encoding('utf-8')], + ['"\\xBC"', 0274.chr.force_encoding('utf-8')], + ['"\\xBD"', 0275.chr.force_encoding('utf-8')], + ['"\\xBE"', 0276.chr.force_encoding('utf-8')], + ['"\\xBF"', 0277.chr.force_encoding('utf-8')], + ['"\\xC0"', 0300.chr.force_encoding('utf-8')], + ['"\\xC1"', 0301.chr.force_encoding('utf-8')], + ['"\\xC2"', 0302.chr.force_encoding('utf-8')], + ['"\\xC3"', 0303.chr.force_encoding('utf-8')], + ['"\\xC4"', 0304.chr.force_encoding('utf-8')], + ['"\\xC5"', 0305.chr.force_encoding('utf-8')], + ['"\\xC6"', 0306.chr.force_encoding('utf-8')], + ['"\\xC7"', 0307.chr.force_encoding('utf-8')], + ['"\\xC8"', 0310.chr.force_encoding('utf-8')], + ['"\\xC9"', 0311.chr.force_encoding('utf-8')], + ['"\\xCA"', 0312.chr.force_encoding('utf-8')], + ['"\\xCB"', 0313.chr.force_encoding('utf-8')], + ['"\\xCC"', 0314.chr.force_encoding('utf-8')], + ['"\\xCD"', 0315.chr.force_encoding('utf-8')], + ['"\\xCE"', 0316.chr.force_encoding('utf-8')], + ['"\\xCF"', 0317.chr.force_encoding('utf-8')], + ['"\\xD0"', 0320.chr.force_encoding('utf-8')], + ['"\\xD1"', 0321.chr.force_encoding('utf-8')], + ['"\\xD2"', 0322.chr.force_encoding('utf-8')], + ['"\\xD3"', 0323.chr.force_encoding('utf-8')], + ['"\\xD4"', 0324.chr.force_encoding('utf-8')], + ['"\\xD5"', 0325.chr.force_encoding('utf-8')], + ['"\\xD6"', 0326.chr.force_encoding('utf-8')], + ['"\\xD7"', 0327.chr.force_encoding('utf-8')], + ['"\\xD8"', 0330.chr.force_encoding('utf-8')], + ['"\\xD9"', 0331.chr.force_encoding('utf-8')], + ['"\\xDA"', 0332.chr.force_encoding('utf-8')], + ['"\\xDB"', 0333.chr.force_encoding('utf-8')], + ['"\\xDC"', 0334.chr.force_encoding('utf-8')], + ['"\\xDD"', 0335.chr.force_encoding('utf-8')], + ['"\\xDE"', 0336.chr.force_encoding('utf-8')], + ['"\\xDF"', 0337.chr.force_encoding('utf-8')], + ['"\\xE0"', 0340.chr.force_encoding('utf-8')], + ['"\\xE1"', 0341.chr.force_encoding('utf-8')], + ['"\\xE2"', 0342.chr.force_encoding('utf-8')], + ['"\\xE3"', 0343.chr.force_encoding('utf-8')], + ['"\\xE4"', 0344.chr.force_encoding('utf-8')], + ['"\\xE5"', 0345.chr.force_encoding('utf-8')], + ['"\\xE6"', 0346.chr.force_encoding('utf-8')], + ['"\\xE7"', 0347.chr.force_encoding('utf-8')], + ['"\\xE8"', 0350.chr.force_encoding('utf-8')], + ['"\\xE9"', 0351.chr.force_encoding('utf-8')], + ['"\\xEA"', 0352.chr.force_encoding('utf-8')], + ['"\\xEB"', 0353.chr.force_encoding('utf-8')], + ['"\\xEC"', 0354.chr.force_encoding('utf-8')], + ['"\\xED"', 0355.chr.force_encoding('utf-8')], + ['"\\xEE"', 0356.chr.force_encoding('utf-8')], + ['"\\xEF"', 0357.chr.force_encoding('utf-8')], + ['"\\xF0"', 0360.chr.force_encoding('utf-8')], + ['"\\xF1"', 0361.chr.force_encoding('utf-8')], + ['"\\xF2"', 0362.chr.force_encoding('utf-8')], + ['"\\xF3"', 0363.chr.force_encoding('utf-8')], + ['"\\xF4"', 0364.chr.force_encoding('utf-8')], + ['"\\xF5"', 0365.chr.force_encoding('utf-8')], + ['"\\xF6"', 0366.chr.force_encoding('utf-8')], + ['"\\xF7"', 0367.chr.force_encoding('utf-8')], + ['"\\xF8"', 0370.chr.force_encoding('utf-8')], + ['"\\xF9"', 0371.chr.force_encoding('utf-8')], + ['"\\xFA"', 0372.chr.force_encoding('utf-8')], + ['"\\xFB"', 0373.chr.force_encoding('utf-8')], + ['"\\xFC"', 0374.chr.force_encoding('utf-8')], + ['"\\xFD"', 0375.chr.force_encoding('utf-8')], + ['"\\xFE"', 0376.chr.force_encoding('utf-8')], + ['"\\xFF"', 0377.chr.force_encoding('utf-8')] + ].should be_computed_by(:undump) + end + + it "returns a string with \\u{} notation replaced with multi-byte UTF-8 characters" do + [ ['"\u{80}"', 0200.chr('utf-8')], + ['"\u{81}"', 0201.chr('utf-8')], + ['"\u{82}"', 0202.chr('utf-8')], + ['"\u{83}"', 0203.chr('utf-8')], + ['"\u{84}"', 0204.chr('utf-8')], + ['"\u{86}"', 0206.chr('utf-8')], + ['"\u{87}"', 0207.chr('utf-8')], + ['"\u{88}"', 0210.chr('utf-8')], + ['"\u{89}"', 0211.chr('utf-8')], + ['"\u{8a}"', 0212.chr('utf-8')], + ['"\u{8b}"', 0213.chr('utf-8')], + ['"\u{8c}"', 0214.chr('utf-8')], + ['"\u{8d}"', 0215.chr('utf-8')], + ['"\u{8e}"', 0216.chr('utf-8')], + ['"\u{8f}"', 0217.chr('utf-8')], + ['"\u{90}"', 0220.chr('utf-8')], + ['"\u{91}"', 0221.chr('utf-8')], + ['"\u{92}"', 0222.chr('utf-8')], + ['"\u{93}"', 0223.chr('utf-8')], + ['"\u{94}"', 0224.chr('utf-8')], + ['"\u{95}"', 0225.chr('utf-8')], + ['"\u{96}"', 0226.chr('utf-8')], + ['"\u{97}"', 0227.chr('utf-8')], + ['"\u{98}"', 0230.chr('utf-8')], + ['"\u{99}"', 0231.chr('utf-8')], + ['"\u{9a}"', 0232.chr('utf-8')], + ['"\u{9b}"', 0233.chr('utf-8')], + ['"\u{9c}"', 0234.chr('utf-8')], + ['"\u{9d}"', 0235.chr('utf-8')], + ['"\u{9e}"', 0236.chr('utf-8')], + ['"\u{9f}"', 0237.chr('utf-8')], + ].should be_computed_by(:undump) + end + + it "returns a string with \\uXXXX notation replaced with multi-byte UTF-8 characters" do + [ ['"\u0080"', 0200.chr('utf-8')], + ['"\u0081"', 0201.chr('utf-8')], + ['"\u0082"', 0202.chr('utf-8')], + ['"\u0083"', 0203.chr('utf-8')], + ['"\u0084"', 0204.chr('utf-8')], + ['"\u0086"', 0206.chr('utf-8')], + ['"\u0087"', 0207.chr('utf-8')], + ['"\u0088"', 0210.chr('utf-8')], + ['"\u0089"', 0211.chr('utf-8')], + ['"\u008a"', 0212.chr('utf-8')], + ['"\u008b"', 0213.chr('utf-8')], + ['"\u008c"', 0214.chr('utf-8')], + ['"\u008d"', 0215.chr('utf-8')], + ['"\u008e"', 0216.chr('utf-8')], + ['"\u008f"', 0217.chr('utf-8')], + ['"\u0090"', 0220.chr('utf-8')], + ['"\u0091"', 0221.chr('utf-8')], + ['"\u0092"', 0222.chr('utf-8')], + ['"\u0093"', 0223.chr('utf-8')], + ['"\u0094"', 0224.chr('utf-8')], + ['"\u0095"', 0225.chr('utf-8')], + ['"\u0096"', 0226.chr('utf-8')], + ['"\u0097"', 0227.chr('utf-8')], + ['"\u0098"', 0230.chr('utf-8')], + ['"\u0099"', 0231.chr('utf-8')], + ['"\u009a"', 0232.chr('utf-8')], + ['"\u009b"', 0233.chr('utf-8')], + ['"\u009c"', 0234.chr('utf-8')], + ['"\u009d"', 0235.chr('utf-8')], + ['"\u009e"', 0236.chr('utf-8')], + ['"\u009f"', 0237.chr('utf-8')], + ].should be_computed_by(:undump) + end + + it "undumps correctly string produced from non ASCII-compatible one" do + s = "\u{876}".encode('utf-16be') + s.dump.undump.should == s + + '"\\bv".force_encoding("UTF-16BE")'.undump.should == "\u0876".encode('utf-16be') + end + + it "returns a String in the same encoding as self" do + '"foo"'.encode("ISO-8859-1").undump.encoding.should == Encoding::ISO_8859_1 + '"foo"'.encode('windows-1251').undump.encoding.should == Encoding::Windows_1251 + end + + describe "Limitations" do + it "cannot undump non ASCII-compatible string" do + -> { '"foo"'.encode('utf-16le').undump }.should raise_error(Encoding::CompatibilityError) + end + end + + describe "invalid dump" do + it "raises RuntimeError exception if wrapping \" are missing" do + -> { 'foo'.undump }.should raise_error(RuntimeError, /invalid dumped string/) + -> { '"foo'.undump }.should raise_error(RuntimeError, /unterminated dumped string/) + -> { 'foo"'.undump }.should raise_error(RuntimeError, /invalid dumped string/) + -> { "'foo'".undump }.should raise_error(RuntimeError, /invalid dumped string/) + end + + it "raises RuntimeError if there is incorrect \\x sequence" do + -> { '"\x"'.undump }.should raise_error(RuntimeError, /invalid hex escape/) + -> { '"\\x3y"'.undump }.should raise_error(RuntimeError, /invalid hex escape/) + end + + it "raises RuntimeError in there is incorrect \\u sequence" do + -> { '"\\u"'.undump }.should raise_error(RuntimeError, /invalid Unicode escape/) + -> { '"\\u{"'.undump }.should raise_error(RuntimeError, /invalid Unicode escape/) + -> { '"\\u{3042"'.undump }.should raise_error(RuntimeError, /invalid Unicode escape/) + -> { '"\\u"'.undump }.should raise_error(RuntimeError, /invalid Unicode escape/) + end + + it "raises RuntimeError if there is malformed dump of non ASCII-compatible string" do + -> { '"".force_encoding("BINARY"'.undump }.should raise_error(RuntimeError, /invalid dumped string/) + -> { '"".force_encoding("Unknown")'.undump }.should raise_error(RuntimeError, /dumped string has unknown encoding name/) + -> { '"".force_encoding()'.undump }.should raise_error(RuntimeError, /invalid dumped string/) + end + + it "raises RuntimeError if string contains \0 character" do + -> { "\"foo\0\"".undump }.should raise_error(RuntimeError, /string contains null byte/) + end + + it "raises RuntimeError if string contains non ASCII character" do + -> { "\"\u3042\"".undump }.should raise_error(RuntimeError, /non-ASCII character detected/) + end + + it "raises RuntimeError if there are some excessive \"" do + -> { '" "" "'.undump }.should raise_error(RuntimeError, /invalid dumped string/) + end + end +end diff --git a/spec/ruby/core/string/unicode_normalize_spec.rb b/spec/ruby/core/string/unicode_normalize_spec.rb new file mode 100644 index 0000000000..2e7d22394a --- /dev/null +++ b/spec/ruby/core/string/unicode_normalize_spec.rb @@ -0,0 +1,116 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' + +# Examples taken from http://www.unicode.org/reports/tr15/#Norm_Forms + +describe "String#unicode_normalize" do + before :each do + @accented_f = "\u1e9b\u0323" + @angstrom = "\u212b" + @ohm = "\u2126" + end + + it "normalizes code points in the string according to the form that is specified" do + @accented_f.unicode_normalize(:nfc).should == "\u1e9b\u0323" + @accented_f.unicode_normalize(:nfd).should == "\u017f\u0323\u0307" + @accented_f.unicode_normalize(:nfkc).should == "\u1e69" + @accented_f.unicode_normalize(:nfkd).should == "\u0073\u0323\u0307" + end + + it "defaults to the nfc normalization form if no forms are specified" do + @accented_f.unicode_normalize.should == "\u1e9b\u0323" + @angstrom.unicode_normalize.should == "\u00c5" + @ohm.unicode_normalize.should == "\u03a9" + end + + # http://unicode.org/faq/normalization.html#6 + context "returns normalized form of string by default" do + it "03D3 (ϓ) GREEK UPSILON WITH ACUTE AND HOOK SYMBOL" do + "\u03D3".unicode_normalize(:nfc).should == "\u03D3" + "\u03D3".unicode_normalize(:nfd).should == "\u03D2\u0301" + "\u03D3".unicode_normalize(:nfkc).should == "\u038E" + "\u03D3".unicode_normalize(:nfkd).should == "\u03A5\u0301" + end + + it "03D4 (ϔ) GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL" do + "\u03D4".unicode_normalize(:nfc).should == "\u03D4" + "\u03D4".unicode_normalize(:nfd).should == "\u03D2\u0308" + "\u03D4".unicode_normalize(:nfkc).should == "\u03AB" + "\u03D4".unicode_normalize(:nfkd).should == "\u03A5\u0308" + end + + it "1E9B (ẛ) LATIN SMALL LETTER LONG S WITH DOT ABOVE" do + "\u1E9B".unicode_normalize(:nfc).should == "\u1E9B" + "\u1E9B".unicode_normalize(:nfd).should == "\u017F\u0307" + "\u1E9B".unicode_normalize(:nfkc).should == "\u1E61" + "\u1E9B".unicode_normalize(:nfkd).should == "\u0073\u0307" + end + end + + it "raises an Encoding::CompatibilityError if string is not in an unicode encoding" do + -> do + [0xE0].pack('C').force_encoding("ISO-8859-1").unicode_normalize(:nfd) + end.should raise_error(Encoding::CompatibilityError) + end + + it "raises an ArgumentError if the specified form is invalid" do + -> { + @angstrom.unicode_normalize(:invalid_form) + }.should raise_error(ArgumentError) + end +end + +describe "String#unicode_normalize!" do + it "normalizes code points and modifies the receiving string" do + angstrom = "\u212b" + angstrom.unicode_normalize! + angstrom.should == "\u00c5" + angstrom.should_not == "\u212b" + end + + it "modifies original string (nfc)" do + str = "a\u0300" + str.unicode_normalize!(:nfc) + + str.should_not == "a\u0300" + str.should == "à" + end + + it "modifies self in place (nfd)" do + str = "\u00E0" + str.unicode_normalize!(:nfd) + + str.should_not == "\u00E0" + str.should == "a\u0300" + end + + it "modifies self in place (nfkc)" do + str = "\u1E9B\u0323" + str.unicode_normalize!(:nfkc) + + str.should_not == "\u1E9B\u0323" + str.should == "\u1E69" + end + + it "modifies self in place (nfkd)" do + str = "\u1E9B\u0323" + str.unicode_normalize!(:nfkd) + + str.should_not == "\u1E9B\u0323" + str.should == "s\u0323\u0307" + end + + it "raises an Encoding::CompatibilityError if the string is not in an unicode encoding" do + -> { + [0xE0].pack('C').force_encoding("ISO-8859-1").unicode_normalize! + }.should raise_error(Encoding::CompatibilityError) + end + + it "raises an ArgumentError if the specified form is invalid" do + ohm = "\u2126" + -> { + ohm.unicode_normalize!(:invalid_form) + }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/unicode_normalized_spec.rb b/spec/ruby/core/string/unicode_normalized_spec.rb new file mode 100644 index 0000000000..91cf2086b2 --- /dev/null +++ b/spec/ruby/core/string/unicode_normalized_spec.rb @@ -0,0 +1,75 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' + +describe "String#unicode_normalized?" do + before :each do + @nfc_normalized_str = "\u1e9b\u0323" + @nfd_normalized_str = "\u017f\u0323\u0307" + @nfkc_normalized_str = "\u1e69" + @nfkd_normalized_str = "\u0073\u0323\u0307" + end + + it "returns true if string is in the specified normalization form" do + @nfc_normalized_str.unicode_normalized?(:nfc).should == true + @nfd_normalized_str.unicode_normalized?(:nfd).should == true + @nfkc_normalized_str.unicode_normalized?(:nfkc).should == true + @nfkd_normalized_str.unicode_normalized?(:nfkd).should == true + end + + it "returns false if string is not in the supplied normalization form" do + @nfd_normalized_str.unicode_normalized?(:nfc).should == false + @nfc_normalized_str.unicode_normalized?(:nfd).should == false + @nfc_normalized_str.unicode_normalized?(:nfkc).should == false + @nfc_normalized_str.unicode_normalized?(:nfkd).should == false + end + + it "defaults to the nfc normalization form if no forms are specified" do + @nfc_normalized_str.should.unicode_normalized? + @nfd_normalized_str.should_not.unicode_normalized? + end + + it "returns true if string is empty" do + "".should.unicode_normalized? + end + + it "returns true if string does not contain any unicode codepoints" do + "abc".should.unicode_normalized? + end + + it "raises an Encoding::CompatibilityError if the string is not in an unicode encoding" do + -> { @nfc_normalized_str.force_encoding("ISO-8859-1").unicode_normalized? }.should raise_error(Encoding::CompatibilityError) + end + + it "raises an ArgumentError if the specified form is invalid" do + -> { @nfc_normalized_str.unicode_normalized?(:invalid_form) }.should raise_error(ArgumentError) + end + + it "returns true if str is in Unicode normalization form (nfc)" do + str = "a\u0300" + str.unicode_normalized?(:nfc).should be_false + str.unicode_normalize!(:nfc) + str.unicode_normalized?(:nfc).should be_true + end + + it "returns true if str is in Unicode normalization form (nfd)" do + str = "a\u00E0" + str.unicode_normalized?(:nfd).should be_false + str.unicode_normalize!(:nfd) + str.unicode_normalized?(:nfd).should be_true + end + + it "returns true if str is in Unicode normalization form (nfkc)" do + str = "a\u0300" + str.unicode_normalized?(:nfkc).should be_false + str.unicode_normalize!(:nfkc) + str.unicode_normalized?(:nfkc).should be_true + end + + it "returns true if str is in Unicode normalization form (nfkd)" do + str = "a\u00E0" + str.unicode_normalized?(:nfkd).should be_false + str.unicode_normalize!(:nfkd) + str.unicode_normalized?(:nfkd).should be_true + end +end diff --git a/spec/ruby/core/string/unpack/a_spec.rb b/spec/ruby/core/string/unpack/a_spec.rb new file mode 100644 index 0000000000..a68e842e15 --- /dev/null +++ b/spec/ruby/core/string/unpack/a_spec.rb @@ -0,0 +1,66 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/string' +require_relative 'shared/taint' + +describe "String#unpack with format 'A'" do + it_behaves_like :string_unpack_basic, 'A' + it_behaves_like :string_unpack_no_platform, 'A' + it_behaves_like :string_unpack_string, 'A' + it_behaves_like :string_unpack_Aa, 'A' + it_behaves_like :string_unpack_taint, 'A' + + it "removes trailing space and NULL bytes from the decoded string" do + [ ["a\x00 b \x00", ["a\x00 b", ""]], + ["a\x00 b \x00 ", ["a\x00 b", ""]], + ["a\x00 b\x00 ", ["a\x00 b", ""]], + ["a\x00 b\x00", ["a\x00 b", ""]], + ["a\x00 b ", ["a\x00 b", ""]] + ].should be_computed_by(:unpack, "A*A") + end + + it "does not remove whitespace other than space" do + [ ["a\x00 b\x00\f", ["a\x00 b\x00\f"]], + ["a\x00 b\x00\n", ["a\x00 b\x00\n"]], + ["a\x00 b\x00\r", ["a\x00 b\x00\r"]], + ["a\x00 b\x00\t", ["a\x00 b\x00\t"]], + ["a\x00 b\x00\v", ["a\x00 b\x00\v"]], + ].should be_computed_by(:unpack, "A*") + end + + it "decodes into raw (ascii) string values" do + str = "str".dup.force_encoding('UTF-8').unpack("A*")[0] + str.encoding.should == Encoding::BINARY + end + +end + +describe "String#unpack with format 'a'" do + it_behaves_like :string_unpack_basic, 'a' + it_behaves_like :string_unpack_no_platform, 'a' + it_behaves_like :string_unpack_string, 'a' + it_behaves_like :string_unpack_Aa, 'a' + it_behaves_like :string_unpack_taint, 'a' + + it "does not remove trailing whitespace or NULL bytes from the decoded string" do + [ ["a\x00 b \x00", ["a\x00 b \x00"]], + ["a\x00 b \x00 ", ["a\x00 b \x00 "]], + ["a\x00 b\x00 ", ["a\x00 b\x00 "]], + ["a\x00 b\x00", ["a\x00 b\x00"]], + ["a\x00 b ", ["a\x00 b "]], + ["a\x00 b\f", ["a\x00 b\f"]], + ["a\x00 b\n", ["a\x00 b\n"]], + ["a\x00 b\r", ["a\x00 b\r"]], + ["a\x00 b\t", ["a\x00 b\t"]], + ["a\x00 b\v", ["a\x00 b\v"]] + ].should be_computed_by(:unpack, "a*") + end + + it "decodes into raw (ascii) string values" do + str = "".unpack("a*")[0] + str.encoding.should == Encoding::BINARY + end + +end diff --git a/spec/ruby/core/string/unpack/at_spec.rb b/spec/ruby/core/string/unpack/at_spec.rb new file mode 100644 index 0000000000..d4133c23ee --- /dev/null +++ b/spec/ruby/core/string/unpack/at_spec.rb @@ -0,0 +1,29 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' + +describe "String#unpack with format '@'" do + it_behaves_like :string_unpack_basic, '@' + it_behaves_like :string_unpack_no_platform, '@' + + it "moves the read index to the byte specified by the count" do + "\x01\x02\x03\x04".unpack("C3@2C").should == [1, 2, 3, 3] + end + + it "implicitly has a count of zero when count is not specified" do + "\x01\x02\x03\x04".unpack("C2@C").should == [1, 2, 1] + end + + it "has no effect when passed the '*' modifier" do + "\x01\x02\x03\x04".unpack("C2@*C").should == [1, 2, 3] + end + + it "positions the read index one beyond the last readable byte in the String" do + "\x01\x02\x03\x04".unpack("C2@4C").should == [1, 2, nil] + end + + it "raises an ArgumentError if the count exceeds the size of the String" do + -> { "\x01\x02\x03\x04".unpack("C2@5C") }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/unpack/b_spec.rb b/spec/ruby/core/string/unpack/b_spec.rb new file mode 100644 index 0000000000..b088f901fc --- /dev/null +++ b/spec/ruby/core/string/unpack/b_spec.rb @@ -0,0 +1,221 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/taint' + +describe "String#unpack with format 'B'" do + it_behaves_like :string_unpack_basic, 'B' + it_behaves_like :string_unpack_no_platform, 'B' + it_behaves_like :string_unpack_taint, 'B' + + it "decodes one bit from each byte for each format character starting with the most significant bit" do + [ ["\x00", "B", ["0"]], + ["\x80", "B", ["1"]], + ["\x0f", "B", ["0"]], + ["\x8f", "B", ["1"]], + ["\x7f", "B", ["0"]], + ["\xff", "B", ["1"]], + ["\x80\x00", "BB", ["1", "0"]], + ["\x8f\x00", "BB", ["1", "0"]], + ["\x80\x0f", "BB", ["1", "0"]], + ["\x80\x8f", "BB", ["1", "1"]], + ["\x80\x80", "BB", ["1", "1"]], + ["\x0f\x80", "BB", ["0", "1"]] + ].should be_computed_by(:unpack) + end + + it "decodes only the number of bits in the string when passed a count" do + "\x83".unpack("B25").should == ["10000011"] + end + + it "decodes multiple differing bit counts from a single string" do + str = "\xaa\xaa\xaa\xaa\x55\xaa\xd4\xc3\x6b\xd7\xaa\xd7\xc3\xd4\xaa\x6b\xd7\xaa" + array = str.unpack("B5B6B7B8B9B10B13B14B16B17") + array.should == ["10101", "101010", "1010101", "10101010", "010101011", + "1101010011", "0110101111010", "10101010110101", + "1100001111010100", "10101010011010111"] + end + + it "decodes a directive with a '*' modifier after a directive with a count modifier" do + "\xd4\xc3\x6b\xd7".unpack("B5B*").should == ["11010", "110000110110101111010111"] + end + + it "decodes a directive with a count modifier after a directive with a '*' modifier" do + "\xd4\xc3\x6b\xd7".unpack("B*B5").should == ["11010100110000110110101111010111", ""] + end + + it "decodes the number of bits specified by the count modifier" do + [ ["\x00", "B0", [""]], + ["\x80", "B1", ["1"]], + ["\x7f", "B2", ["01"]], + ["\x8f", "B3", ["100"]], + ["\x7f", "B4", ["0111"]], + ["\xff", "B5", ["11111"]], + ["\xf8", "B6", ["111110"]], + ["\x9c", "B7", ["1001110"]], + ["\xbd", "B8", ["10111101"]], + ["\x80\x80", "B9", ["100000001"]], + ["\x80\x70", "B10", ["1000000001"]], + ["\x80\x20", "B11", ["10000000001"]], + ["\x8f\x10", "B12", ["100011110001"]], + ["\x8f\x0f", "B13", ["1000111100001"]], + ["\x80\x0f", "B14", ["10000000000011"]], + ["\x80\x8f", "B15", ["100000001000111"]], + ["\x0f\x81", "B16", ["0000111110000001"]] + ].should be_computed_by(:unpack) + end + + it "decodes all the bits when passed the '*' modifier" do + [ ["", [""]], + ["\x00", ["00000000"]], + ["\x80", ["10000000"]], + ["\x7f", ["01111111"]], + ["\x81", ["10000001"]], + ["\x0f", ["00001111"]], + ["\x80\x80", ["1000000010000000"]], + ["\x8f\x10", ["1000111100010000"]], + ["\x00\x10", ["0000000000010000"]] + ].should be_computed_by(:unpack, "B*") + end + + it "adds an empty string for each element requested beyond the end of the String" do + [ ["", ["", "", ""]], + ["\x80", ["1", "", ""]], + ["\x80\x08", ["1", "0", ""]] + ].should be_computed_by(:unpack, "BBB") + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x80\x00".unpack("B\x00B").should == ["1", "0"] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x80\x00".unpack("B\x00B") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "\x80\x00".unpack("B B").should == ["1", "0"] + end + + it "decodes into US-ASCII string values" do + str = "s".dup.force_encoding('UTF-8').unpack("B*")[0] + str.encoding.name.should == 'US-ASCII' + end +end + +describe "String#unpack with format 'b'" do + it_behaves_like :string_unpack_basic, 'b' + it_behaves_like :string_unpack_no_platform, 'b' + it_behaves_like :string_unpack_taint, 'b' + + it "decodes one bit from each byte for each format character starting with the least significant bit" do + [ ["\x00", "b", ["0"]], + ["\x01", "b", ["1"]], + ["\xf0", "b", ["0"]], + ["\xf1", "b", ["1"]], + ["\xfe", "b", ["0"]], + ["\xff", "b", ["1"]], + ["\x01\x00", "bb", ["1", "0"]], + ["\xf1\x00", "bb", ["1", "0"]], + ["\x01\xf0", "bb", ["1", "0"]], + ["\x01\xf1", "bb", ["1", "1"]], + ["\x01\x01", "bb", ["1", "1"]], + ["\xf0\x01", "bb", ["0", "1"]] + ].should be_computed_by(:unpack) + end + + it "decodes only the number of bits in the string when passed a count" do + "\x83".unpack("b25").should == ["11000001"] + end + + it "decodes multiple differing bit counts from a single string" do + str = "\xaa\xaa\xaa\xaa\x55\xaa\xd4\xc3\x6b\xd7\xaa\xd7\xc3\xd4\xaa\x6b\xd7\xaa" + array = str.unpack("b5b6b7b8b9b10b13b14b16b17") + array.should == ["01010", "010101", "0101010", "01010101", "101010100", + "0010101111", "1101011011101", "01010101111010", + "1100001100101011", "01010101110101101"] + end + + it "decodes a directive with a '*' modifier after a directive with a count modifier" do + "\xd4\xc3\x6b\xd7".unpack("b5b*").should == ["00101", "110000111101011011101011"] + end + + it "decodes a directive with a count modifier after a directive with a '*' modifier" do + "\xd4\xc3\x6b\xd7".unpack("b*b5").should == ["00101011110000111101011011101011", ""] + end + + it "decodes the number of bits specified by the count modifier" do + [ ["\x00", "b0", [""]], + ["\x01", "b1", ["1"]], + ["\xfe", "b2", ["01"]], + ["\xfc", "b3", ["001"]], + ["\xf7", "b4", ["1110"]], + ["\xff", "b5", ["11111"]], + ["\xfe", "b6", ["011111"]], + ["\xce", "b7", ["0111001"]], + ["\xbd", "b8", ["10111101"]], + ["\x01\xff", "b9", ["100000001"]], + ["\x01\xfe", "b10", ["1000000001"]], + ["\x01\xfc", "b11", ["10000000001"]], + ["\xf1\xf8", "b12", ["100011110001"]], + ["\xe1\xf1", "b13", ["1000011110001"]], + ["\x03\xe0", "b14", ["11000000000001"]], + ["\x47\xc0", "b15", ["111000100000001"]], + ["\x81\x0f", "b16", ["1000000111110000"]] + ].should be_computed_by(:unpack) + end + + it "decodes all the bits when passed the '*' modifier" do + [ ["", [""]], + ["\x00", ["00000000"]], + ["\x80", ["00000001"]], + ["\x7f", ["11111110"]], + ["\x81", ["10000001"]], + ["\x0f", ["11110000"]], + ["\x80\x80", ["0000000100000001"]], + ["\x8f\x10", ["1111000100001000"]], + ["\x00\x10", ["0000000000001000"]] + ].should be_computed_by(:unpack, "b*") + end + + it "adds an empty string for each element requested beyond the end of the String" do + [ ["", ["", "", ""]], + ["\x01", ["1", "", ""]], + ["\x01\x80", ["1", "0", ""]] + ].should be_computed_by(:unpack, "bbb") + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x00".unpack("b\x00b").should == ["1", "0"] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x00".unpack("b\x00b") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "\x01\x00".unpack("b b").should == ["1", "0"] + end + + it "decodes into US-ASCII string values" do + str = "s".dup.force_encoding('UTF-8').unpack("b*")[0] + str.encoding.name.should == 'US-ASCII' + end +end diff --git a/spec/ruby/core/string/unpack/c_spec.rb b/spec/ruby/core/string/unpack/c_spec.rb new file mode 100644 index 0000000000..1e9548fb82 --- /dev/null +++ b/spec/ruby/core/string/unpack/c_spec.rb @@ -0,0 +1,75 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' + +describe :string_unpack_8bit, shared: true do + it "decodes one byte for a single format character" do + "abc".unpack(unpack_format()).should == [97] + end + + it "decodes two bytes for two format characters" do + "abc".unpack(unpack_format(nil, 2)).should == [97, 98] + end + + it "decodes the number of bytes requested by the count modifier" do + "abc".unpack(unpack_format(2)).should == [97, 98] + end + + it "decodes the remaining bytes when passed the '*' modifier" do + "abc".unpack(unpack_format('*')).should == [97, 98, 99] + end + + it "decodes the remaining bytes when passed the '*' modifier after another directive" do + "abc".unpack(unpack_format()+unpack_format('*')).should == [97, 98, 99] + end + + it "decodes zero bytes when no bytes remain and the '*' modifier is passed" do + "abc".unpack(unpack_format('*', 2)).should == [97, 98, 99] + end + + it "adds nil for each element requested beyond the end of the String" do + [ ["", [nil, nil, nil]], + ["a", [97, nil, nil]], + ["ab", [97, 98, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "abc".unpack(unpack_format("\000", 2)).should == [97, 98] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "abc".unpack(unpack_format(' ', 2)).should == [97, 98] + end +end + +describe "String#unpack with format 'C'" do + it_behaves_like :string_unpack_basic, 'C' + it_behaves_like :string_unpack_8bit, 'C' + + it "decodes a byte with most significant bit set as a positive number" do + "\xff\x80\x82".unpack('C*').should == [255, 128, 130] + end +end + +describe "String#unpack with format 'c'" do + it_behaves_like :string_unpack_basic, 'c' + it_behaves_like :string_unpack_8bit, 'c' + + it "decodes a byte with most significant bit set as a negative number" do + "\xff\x80\x82".unpack('c*').should == [-1, -128, -126] + end +end diff --git a/spec/ruby/core/string/unpack/comment_spec.rb b/spec/ruby/core/string/unpack/comment_spec.rb new file mode 100644 index 0000000000..050d2b7fc0 --- /dev/null +++ b/spec/ruby/core/string/unpack/comment_spec.rb @@ -0,0 +1,25 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' + +describe "String#unpack" do + it "ignores directives text from '#' to the first newline" do + "\x01\x02\x03".unpack("c#this is a comment\nc").should == [1, 2] + end + + it "ignores directives text from '#' to the end if no newline is present" do + "\x01\x02\x03".unpack("c#this is a comment c").should == [1] + end + + it "ignores comments at the start of the directives string" do + "\x01\x02\x03".unpack("#this is a comment\nc").should == [1] + end + + it "ignores the entire directive string if it is a comment" do + "\x01\x02\x03".unpack("#this is a comment c").should == [] + end + + it "ignores multiple comments" do + "\x01\x02\x03".unpack("c#comment\nc#comment\nc#c").should == [1, 2, 3] + end +end diff --git a/spec/ruby/core/string/unpack/d_spec.rb b/spec/ruby/core/string/unpack/d_spec.rb new file mode 100644 index 0000000000..0e4f57ec04 --- /dev/null +++ b/spec/ruby/core/string/unpack/d_spec.rb @@ -0,0 +1,28 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/float' + +little_endian do + describe "String#unpack with format 'D'" do + it_behaves_like :string_unpack_basic, 'D' + it_behaves_like :string_unpack_double_le, 'D' + end + + describe "String#unpack with format 'd'" do + it_behaves_like :string_unpack_basic, 'd' + it_behaves_like :string_unpack_double_le, 'd' + end +end + +big_endian do + describe "String#unpack with format 'D'" do + it_behaves_like :string_unpack_basic, 'D' + it_behaves_like :string_unpack_double_be, 'D' + end + + describe "String#unpack with format 'd'" do + it_behaves_like :string_unpack_basic, 'd' + it_behaves_like :string_unpack_double_be, 'd' + end +end diff --git a/spec/ruby/core/string/unpack/e_spec.rb b/spec/ruby/core/string/unpack/e_spec.rb new file mode 100644 index 0000000000..c958be1c8b --- /dev/null +++ b/spec/ruby/core/string/unpack/e_spec.rb @@ -0,0 +1,14 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/float' + +describe "String#unpack with format 'E'" do + it_behaves_like :string_unpack_basic, 'E' + it_behaves_like :string_unpack_double_le, 'E' +end + +describe "String#unpack with format 'e'" do + it_behaves_like :string_unpack_basic, 'e' + it_behaves_like :string_unpack_float_le, 'e' +end diff --git a/spec/ruby/core/string/unpack/f_spec.rb b/spec/ruby/core/string/unpack/f_spec.rb new file mode 100644 index 0000000000..ec8b9d435e --- /dev/null +++ b/spec/ruby/core/string/unpack/f_spec.rb @@ -0,0 +1,28 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/float' + +little_endian do + describe "String#unpack with format 'F'" do + it_behaves_like :string_unpack_basic, 'F' + it_behaves_like :string_unpack_float_le, 'F' + end + + describe "String#unpack with format 'f'" do + it_behaves_like :string_unpack_basic, 'f' + it_behaves_like :string_unpack_float_le, 'f' + end +end + +big_endian do + describe "String#unpack with format 'F'" do + it_behaves_like :string_unpack_basic, 'F' + it_behaves_like :string_unpack_float_be, 'F' + end + + describe "String#unpack with format 'f'" do + it_behaves_like :string_unpack_basic, 'f' + it_behaves_like :string_unpack_float_be, 'f' + end +end diff --git a/spec/ruby/core/string/unpack/g_spec.rb b/spec/ruby/core/string/unpack/g_spec.rb new file mode 100644 index 0000000000..ffc423b152 --- /dev/null +++ b/spec/ruby/core/string/unpack/g_spec.rb @@ -0,0 +1,14 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/float' + +describe "String#unpack with format 'G'" do + it_behaves_like :string_unpack_basic, 'G' + it_behaves_like :string_unpack_double_be, 'G' +end + +describe "String#unpack with format 'g'" do + it_behaves_like :string_unpack_basic, 'g' + it_behaves_like :string_unpack_float_be, 'g' +end diff --git a/spec/ruby/core/string/unpack/h_spec.rb b/spec/ruby/core/string/unpack/h_spec.rb new file mode 100644 index 0000000000..535836087d --- /dev/null +++ b/spec/ruby/core/string/unpack/h_spec.rb @@ -0,0 +1,159 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/taint' + +describe "String#unpack with format 'H'" do + it_behaves_like :string_unpack_basic, 'H' + it_behaves_like :string_unpack_no_platform, 'H' + it_behaves_like :string_unpack_taint, 'H' + + it "decodes one nibble from each byte for each format character starting with the most significant bit" do + [ ["\x8f", "H", ["8"]], + ["\xf8\x0f", "HH", ["f", "0"]] + ].should be_computed_by(:unpack) + end + + it "decodes only the number of nibbles in the string when passed a count" do + "\xca\xfe".unpack("H5").should == ["cafe"] + end + + it "decodes multiple differing nibble counts from a single string" do + array = "\xaa\x55\xaa\xd4\xc3\x6b\xd7\xaa\xd7".unpack("HH2H3H4H5") + array.should == ["a", "55", "aad", "c36b", "d7aad"] + end + + it "decodes a directive with a '*' modifier after a directive with a count modifier" do + "\xaa\x55\xaa\xd4\xc3\x6b".unpack("H3H*").should == ["aa5", "aad4c36b"] + end + + it "decodes a directive with a count modifier after a directive with a '*' modifier" do + "\xaa\x55\xaa\xd4\xc3\x6b".unpack("H*H3").should == ["aa55aad4c36b", ""] + end + + it "decodes the number of nibbles specified by the count modifier" do + [ ["\xab", "H0", [""]], + ["\x00", "H1", ["0"]], + ["\x01", "H2", ["01"]], + ["\x01\x23", "H3", ["012"]], + ["\x01\x23", "H4", ["0123"]], + ["\x01\x23\x45", "H5", ["01234"]] + ].should be_computed_by(:unpack) + end + + it "decodes all the nibbles when passed the '*' modifier" do + [ ["", [""]], + ["\xab", ["ab"]], + ["\xca\xfe", ["cafe"]], + ].should be_computed_by(:unpack, "H*") + end + + it "adds an empty string for each element requested beyond the end of the String" do + [ ["", ["", "", ""]], + ["\x01", ["0", "", ""]], + ["\x01\x80", ["0", "8", ""]] + ].should be_computed_by(:unpack, "HHH") + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x10".unpack("H\x00H").should == ["0", "1"] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("H\x00H") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "\x01\x10".unpack("H H").should == ["0", "1"] + end + + it "should make strings with US_ASCII encoding" do + "\x01".unpack("H")[0].encoding.should == Encoding::US_ASCII + end +end + +describe "String#unpack with format 'h'" do + it_behaves_like :string_unpack_basic, 'h' + it_behaves_like :string_unpack_no_platform, 'h' + it_behaves_like :string_unpack_taint, 'h' + + it "decodes one nibble from each byte for each format character starting with the least significant bit" do + [ ["\x8f", "h", ["f"]], + ["\xf8\x0f", "hh", ["8", "f"]] + ].should be_computed_by(:unpack) + end + + it "decodes only the number of nibbles in the string when passed a count" do + "\xac\xef".unpack("h5").should == ["cafe"] + end + + it "decodes multiple differing nibble counts from a single string" do + array = "\xaa\x55\xaa\xd4\xc3\x6b\xd7\xaa\xd7".unpack("hh2h3h4h5") + array.should == ["a", "55", "aa4", "3cb6", "7daa7"] + end + + it "decodes a directive with a '*' modifier after a directive with a count modifier" do + "\xba\x55\xaa\xd4\xc3\x6b".unpack("h3h*").should == ["ab5", "aa4d3cb6"] + end + + it "decodes a directive with a count modifier after a directive with a '*' modifier" do + "\xba\x55\xaa\xd4\xc3\x6b".unpack("h*h3").should == ["ab55aa4d3cb6", ""] + end + + it "decodes the number of nibbles specified by the count modifier" do + [ ["\xab", "h0", [""]], + ["\x00", "h1", ["0"]], + ["\x01", "h2", ["10"]], + ["\x01\x23", "h3", ["103"]], + ["\x01\x23", "h4", ["1032"]], + ["\x01\x23\x45", "h5", ["10325"]] + ].should be_computed_by(:unpack) + end + + it "decodes all the nibbles when passed the '*' modifier" do + [ ["", [""]], + ["\xab", ["ba"]], + ["\xac\xef", ["cafe"]], + ].should be_computed_by(:unpack, "h*") + end + + it "adds an empty string for each element requested beyond the end of the String" do + [ ["", ["", "", ""]], + ["\x01", ["1", "", ""]], + ["\x01\x80", ["1", "0", ""]] + ].should be_computed_by(:unpack, "hhh") + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x10".unpack("h\x00h").should == ["1", "0"] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x10".unpack("h\x00h") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "\x01\x10".unpack("h h").should == ["1", "0"] + end + + it "should make strings with US_ASCII encoding" do + "\x01".unpack("h")[0].encoding.should == Encoding::US_ASCII + end +end diff --git a/spec/ruby/core/string/unpack/i_spec.rb b/spec/ruby/core/string/unpack/i_spec.rb new file mode 100644 index 0000000000..b4bbba1923 --- /dev/null +++ b/spec/ruby/core/string/unpack/i_spec.rb @@ -0,0 +1,152 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/integer' + +describe "String#unpack with format 'I'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_32bit_le, 'I<' + it_behaves_like :string_unpack_32bit_le_unsigned, 'I<' + end + + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_32bit_le, 'I<_' + it_behaves_like :string_unpack_32bit_le, 'I_<' + it_behaves_like :string_unpack_32bit_le_unsigned, 'I<_' + it_behaves_like :string_unpack_32bit_le_unsigned, 'I_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_32bit_le, 'I<!' + it_behaves_like :string_unpack_32bit_le, 'I!<' + it_behaves_like :string_unpack_32bit_le_unsigned, 'I<!' + it_behaves_like :string_unpack_32bit_le_unsigned, 'I!<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_32bit_be, 'I>' + it_behaves_like :string_unpack_32bit_be_unsigned, 'I>' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_32bit_be, 'I>_' + it_behaves_like :string_unpack_32bit_be, 'I_>' + it_behaves_like :string_unpack_32bit_be_unsigned, 'I>_' + it_behaves_like :string_unpack_32bit_be_unsigned, 'I_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_32bit_be, 'I>!' + it_behaves_like :string_unpack_32bit_be, 'I!>' + it_behaves_like :string_unpack_32bit_be_unsigned, 'I>!' + it_behaves_like :string_unpack_32bit_be_unsigned, 'I!>' + end +end + +describe "String#unpack with format 'i'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_32bit_le, 'i<' + it_behaves_like :string_unpack_32bit_le_signed, 'i<' + end + + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_32bit_le, 'i<_' + it_behaves_like :string_unpack_32bit_le, 'i_<' + it_behaves_like :string_unpack_32bit_le_signed, 'i<_' + it_behaves_like :string_unpack_32bit_le_signed, 'i_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_32bit_le, 'i<!' + it_behaves_like :string_unpack_32bit_le, 'i!<' + it_behaves_like :string_unpack_32bit_le_signed, 'i<!' + it_behaves_like :string_unpack_32bit_le_signed, 'i!<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_32bit_be, 'i>' + it_behaves_like :string_unpack_32bit_be_signed, 'i>' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_32bit_be, 'i>_' + it_behaves_like :string_unpack_32bit_be, 'i_>' + it_behaves_like :string_unpack_32bit_be_signed, 'i>_' + it_behaves_like :string_unpack_32bit_be_signed, 'i_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_32bit_be, 'i>!' + it_behaves_like :string_unpack_32bit_be, 'i!>' + it_behaves_like :string_unpack_32bit_be_signed, 'i>!' + it_behaves_like :string_unpack_32bit_be_signed, 'i!>' + end +end + +little_endian do + describe "String#unpack with format 'I'" do + it_behaves_like :string_unpack_basic, 'I' + it_behaves_like :string_unpack_32bit_le, 'I' + it_behaves_like :string_unpack_32bit_le_unsigned, 'I' + end + + describe "String#unpack with format 'I' with modifier '_'" do + it_behaves_like :string_unpack_32bit_le, 'I_' + it_behaves_like :string_unpack_32bit_le_unsigned, 'I_' + end + + describe "String#unpack with format 'I' with modifier '!'" do + it_behaves_like :string_unpack_32bit_le, 'I!' + it_behaves_like :string_unpack_32bit_le_unsigned, 'I!' + end + + describe "String#unpack with format 'i'" do + it_behaves_like :string_unpack_basic, 'i' + it_behaves_like :string_unpack_32bit_le, 'i' + it_behaves_like :string_unpack_32bit_le_signed, 'i' + end + + describe "String#unpack with format 'i' with modifier '_'" do + it_behaves_like :string_unpack_32bit_le, 'i_' + it_behaves_like :string_unpack_32bit_le_signed, 'i_' + end + + describe "String#unpack with format 'i' with modifier '!'" do + it_behaves_like :string_unpack_32bit_le, 'i!' + it_behaves_like :string_unpack_32bit_le_signed, 'i!' + end +end + +big_endian do + describe "String#unpack with format 'I'" do + it_behaves_like :string_unpack_basic, 'I' + it_behaves_like :string_unpack_32bit_be, 'I' + it_behaves_like :string_unpack_32bit_be_unsigned, 'I' + end + + describe "String#unpack with format 'I' with modifier '_'" do + it_behaves_like :string_unpack_32bit_be, 'I_' + it_behaves_like :string_unpack_32bit_be_unsigned, 'I_' + end + + describe "String#unpack with format 'I' with modifier '!'" do + it_behaves_like :string_unpack_32bit_be, 'I!' + it_behaves_like :string_unpack_32bit_be_unsigned, 'I!' + end + + describe "String#unpack with format 'i'" do + it_behaves_like :string_unpack_basic, 'i' + it_behaves_like :string_unpack_32bit_be, 'i' + it_behaves_like :string_unpack_32bit_be_signed, 'i' + end + + describe "String#unpack with format 'i' with modifier '_'" do + it_behaves_like :string_unpack_32bit_be, 'i_' + it_behaves_like :string_unpack_32bit_be_signed, 'i_' + end + + describe "String#unpack with format 'i' with modifier '!'" do + it_behaves_like :string_unpack_32bit_be, 'i!' + it_behaves_like :string_unpack_32bit_be_signed, 'i!' + end +end diff --git a/spec/ruby/core/string/unpack/j_spec.rb b/spec/ruby/core/string/unpack/j_spec.rb new file mode 100644 index 0000000000..3c2baad642 --- /dev/null +++ b/spec/ruby/core/string/unpack/j_spec.rb @@ -0,0 +1,272 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/integer' + +platform_is pointer_size: 64 do + little_endian do + describe "String#unpack with format 'J'" do + describe "with modifier '_'" do + it_behaves_like :string_unpack_64bit_le, 'J_' + it_behaves_like :string_unpack_64bit_le_unsigned, 'J_' + end + + describe "with modifier '!'" do + it_behaves_like :string_unpack_64bit_le, 'J!' + it_behaves_like :string_unpack_64bit_le_unsigned, 'J!' + end + end + + describe "String#unpack with format 'j'" do + describe "with modifier '_'" do + it_behaves_like :string_unpack_64bit_le, 'j_' + it_behaves_like :string_unpack_64bit_le_signed, 'j_' + end + + describe "with modifier '!'" do + it_behaves_like :string_unpack_64bit_le, 'j!' + it_behaves_like :string_unpack_64bit_le_signed, 'j!' + end + end + end + + big_endian do + describe "String#unpack with format 'J'" do + describe "with modifier '_'" do + it_behaves_like :string_unpack_64bit_be, 'J_' + it_behaves_like :string_unpack_64bit_be_unsigned, 'J_' + end + + describe "with modifier '!'" do + it_behaves_like :string_unpack_64bit_be, 'J!' + it_behaves_like :string_unpack_64bit_be_unsigned, 'J!' + end + end + + describe "String#unpack with format 'j'" do + describe "with modifier '_'" do + it_behaves_like :string_unpack_64bit_be, 'j_' + it_behaves_like :string_unpack_64bit_be_signed, 'j_' + end + + describe "with modifier '!'" do + it_behaves_like :string_unpack_64bit_be, 'j!' + it_behaves_like :string_unpack_64bit_be_signed, 'j!' + end + end + end + + describe "String#unpack with format 'J'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_64bit_le, 'J<' + it_behaves_like :string_unpack_64bit_le_unsigned, 'J<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_64bit_be, 'J>' + it_behaves_like :string_unpack_64bit_be_unsigned, 'J>' + end + + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_64bit_le, 'J<_' + it_behaves_like :string_unpack_64bit_le, 'J_<' + it_behaves_like :string_unpack_64bit_le_unsigned, 'J<_' + it_behaves_like :string_unpack_64bit_le_unsigned, 'J_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_64bit_le, 'J<!' + it_behaves_like :string_unpack_64bit_le, 'J!<' + it_behaves_like :string_unpack_64bit_le_unsigned, 'J<!' + it_behaves_like :string_unpack_64bit_le_unsigned, 'J!<' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_64bit_be, 'J>_' + it_behaves_like :string_unpack_64bit_be, 'J_>' + it_behaves_like :string_unpack_64bit_be_unsigned, 'J>_' + it_behaves_like :string_unpack_64bit_be_unsigned, 'J_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_64bit_be, 'J>!' + it_behaves_like :string_unpack_64bit_be, 'J!>' + it_behaves_like :string_unpack_64bit_be_unsigned, 'J>!' + it_behaves_like :string_unpack_64bit_be_unsigned, 'J!>' + end + end + + describe "String#unpack with format 'j'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_64bit_le, 'j<' + it_behaves_like :string_unpack_64bit_le_signed, 'j<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_64bit_be, 'j>' + it_behaves_like :string_unpack_64bit_be_signed, 'j>' + end + + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_64bit_le, 'j<_' + it_behaves_like :string_unpack_64bit_le, 'j_<' + it_behaves_like :string_unpack_64bit_le_signed, 'j<_' + it_behaves_like :string_unpack_64bit_le_signed, 'j_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_64bit_le, 'j<!' + it_behaves_like :string_unpack_64bit_le, 'j!<' + it_behaves_like :string_unpack_64bit_le_signed, 'j<!' + it_behaves_like :string_unpack_64bit_le_signed, 'j!<' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_64bit_be, 'j>_' + it_behaves_like :string_unpack_64bit_be, 'j_>' + it_behaves_like :string_unpack_64bit_be_signed, 'j>_' + it_behaves_like :string_unpack_64bit_be_signed, 'j_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_64bit_be, 'j>!' + it_behaves_like :string_unpack_64bit_be, 'j!>' + it_behaves_like :string_unpack_64bit_be_signed, 'j>!' + it_behaves_like :string_unpack_64bit_be_signed, 'j!>' + end + end +end + +platform_is pointer_size: 32 do + little_endian do + describe "String#unpack with format 'J'" do + describe "with modifier '_'" do + it_behaves_like :string_unpack_32bit_le, 'J_' + it_behaves_like :string_unpack_32bit_le_unsigned, 'J_' + end + + describe "with modifier '!'" do + it_behaves_like :string_unpack_32bit_le, 'J!' + it_behaves_like :string_unpack_32bit_le_unsigned, 'J!' + end + end + + describe "String#unpack with format 'j'" do + describe "with modifier '_'" do + it_behaves_like :string_unpack_32bit_le, 'j_' + it_behaves_like :string_unpack_32bit_le_signed, 'j_' + end + + describe "with modifier '!'" do + it_behaves_like :string_unpack_32bit_le, 'j!' + it_behaves_like :string_unpack_32bit_le_signed, 'j!' + end + end + end + + big_endian do + describe "String#unpack with format 'J'" do + describe "with modifier '_'" do + it_behaves_like :string_unpack_32bit_be, 'J_' + it_behaves_like :string_unpack_32bit_be_unsigned, 'J_' + end + + describe "with modifier '!'" do + it_behaves_like :string_unpack_32bit_be, 'J!' + it_behaves_like :string_unpack_32bit_be_unsigned, 'J!' + end + end + + describe "String#unpack with format 'j'" do + describe "with modifier '_'" do + it_behaves_like :string_unpack_32bit_be, 'j_' + it_behaves_like :string_unpack_32bit_be_signed, 'j_' + end + + describe "with modifier '!'" do + it_behaves_like :string_unpack_32bit_be, 'j!' + it_behaves_like :string_unpack_32bit_be_signed, 'j!' + end + end + end + + describe "String#unpack with format 'J'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_32bit_le, 'J<' + it_behaves_like :string_unpack_32bit_le_unsigned, 'J<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_32bit_be, 'J>' + it_behaves_like :string_unpack_32bit_be_unsigned, 'J>' + end + + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_32bit_le, 'J<_' + it_behaves_like :string_unpack_32bit_le, 'J_<' + it_behaves_like :string_unpack_32bit_le_unsigned, 'J<_' + it_behaves_like :string_unpack_32bit_le_unsigned, 'J_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_32bit_le, 'J<!' + it_behaves_like :string_unpack_32bit_le, 'J!<' + it_behaves_like :string_unpack_32bit_le_unsigned, 'J<!' + it_behaves_like :string_unpack_32bit_le_unsigned, 'J!<' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_32bit_be, 'J>_' + it_behaves_like :string_unpack_32bit_be, 'J_>' + it_behaves_like :string_unpack_32bit_be_unsigned, 'J>_' + it_behaves_like :string_unpack_32bit_be_unsigned, 'J_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_32bit_be, 'J>!' + it_behaves_like :string_unpack_32bit_be, 'J!>' + it_behaves_like :string_unpack_32bit_be_unsigned, 'J>!' + it_behaves_like :string_unpack_32bit_be_unsigned, 'J!>' + end + end + + describe "String#unpack with format 'j'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_32bit_le, 'j<' + it_behaves_like :string_unpack_32bit_le_signed, 'j<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_32bit_be, 'j>' + it_behaves_like :string_unpack_32bit_be_signed, 'j>' + end + + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_32bit_le, 'j<_' + it_behaves_like :string_unpack_32bit_le, 'j_<' + it_behaves_like :string_unpack_32bit_le_signed, 'j<_' + it_behaves_like :string_unpack_32bit_le_signed, 'j_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_32bit_le, 'j<!' + it_behaves_like :string_unpack_32bit_le, 'j!<' + it_behaves_like :string_unpack_32bit_le_signed, 'j<!' + it_behaves_like :string_unpack_32bit_le_signed, 'j!<' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_32bit_be, 'j>_' + it_behaves_like :string_unpack_32bit_be, 'j_>' + it_behaves_like :string_unpack_32bit_be_signed, 'j>_' + it_behaves_like :string_unpack_32bit_be_signed, 'j_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_32bit_be, 'j>!' + it_behaves_like :string_unpack_32bit_be, 'j!>' + it_behaves_like :string_unpack_32bit_be_signed, 'j>!' + it_behaves_like :string_unpack_32bit_be_signed, 'j!>' + end + end +end diff --git a/spec/ruby/core/string/unpack/l_spec.rb b/spec/ruby/core/string/unpack/l_spec.rb new file mode 100644 index 0000000000..0adb567eca --- /dev/null +++ b/spec/ruby/core/string/unpack/l_spec.rb @@ -0,0 +1,265 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/integer' + +describe "String#unpack with format 'L'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_32bit_le, 'L<' + it_behaves_like :string_unpack_32bit_le_unsigned, 'L<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_32bit_be, 'L>' + it_behaves_like :string_unpack_32bit_be_unsigned, 'L>' + end + + platform_is c_long_size: 32 do + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_32bit_le, 'L<_' + it_behaves_like :string_unpack_32bit_le, 'L_<' + it_behaves_like :string_unpack_32bit_le_unsigned, 'L<_' + it_behaves_like :string_unpack_32bit_le_unsigned, 'L_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_32bit_le, 'L<!' + it_behaves_like :string_unpack_32bit_le, 'L!<' + it_behaves_like :string_unpack_32bit_le_unsigned, 'L<!' + it_behaves_like :string_unpack_32bit_le_unsigned, 'L!<' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_32bit_be, 'L>_' + it_behaves_like :string_unpack_32bit_be, 'L_>' + it_behaves_like :string_unpack_32bit_be_unsigned, 'L>_' + it_behaves_like :string_unpack_32bit_be_unsigned, 'L_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_32bit_be, 'L>!' + it_behaves_like :string_unpack_32bit_be, 'L!>' + it_behaves_like :string_unpack_32bit_be_unsigned, 'L>!' + it_behaves_like :string_unpack_32bit_be_unsigned, 'L!>' + end + end + + platform_is c_long_size: 64 do + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_64bit_le, 'L<_' + it_behaves_like :string_unpack_64bit_le, 'L_<' + it_behaves_like :string_unpack_64bit_le_unsigned, 'L<_' + it_behaves_like :string_unpack_64bit_le_unsigned, 'L_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_64bit_le, 'L<!' + it_behaves_like :string_unpack_64bit_le, 'L!<' + it_behaves_like :string_unpack_64bit_le_unsigned, 'L<!' + it_behaves_like :string_unpack_64bit_le_unsigned, 'L!<' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_64bit_be, 'L>_' + it_behaves_like :string_unpack_64bit_be, 'L_>' + it_behaves_like :string_unpack_64bit_be_unsigned, 'L>_' + it_behaves_like :string_unpack_64bit_be_unsigned, 'L_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_64bit_be, 'L>!' + it_behaves_like :string_unpack_64bit_be, 'L!>' + it_behaves_like :string_unpack_64bit_be_unsigned, 'L>!' + it_behaves_like :string_unpack_64bit_be_unsigned, 'L!>' + end + end +end + +describe "String#unpack with format 'l'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_32bit_le, 'l<' + it_behaves_like :string_unpack_32bit_le_signed, 'l<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_32bit_be, 'l>' + it_behaves_like :string_unpack_32bit_be_signed, 'l>' + end + + platform_is c_long_size: 32 do + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_32bit_le, 'l<_' + it_behaves_like :string_unpack_32bit_le, 'l_<' + it_behaves_like :string_unpack_32bit_le_signed, 'l<_' + it_behaves_like :string_unpack_32bit_le_signed, 'l_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_32bit_le, 'l<!' + it_behaves_like :string_unpack_32bit_le, 'l!<' + it_behaves_like :string_unpack_32bit_le_signed, 'l<!' + it_behaves_like :string_unpack_32bit_le_signed, 'l!<' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_32bit_be, 'l>_' + it_behaves_like :string_unpack_32bit_be, 'l_>' + it_behaves_like :string_unpack_32bit_be_signed, 'l>_' + it_behaves_like :string_unpack_32bit_be_signed, 'l_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_32bit_be, 'l>!' + it_behaves_like :string_unpack_32bit_be, 'l!>' + it_behaves_like :string_unpack_32bit_be_signed, 'l>!' + it_behaves_like :string_unpack_32bit_be_signed, 'l!>' + end + end + + platform_is c_long_size: 64 do + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_64bit_le, 'l<_' + it_behaves_like :string_unpack_64bit_le, 'l_<' + it_behaves_like :string_unpack_64bit_le_signed, 'l<_' + it_behaves_like :string_unpack_64bit_le_signed, 'l_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_64bit_le, 'l<!' + it_behaves_like :string_unpack_64bit_le, 'l!<' + it_behaves_like :string_unpack_64bit_le_signed, 'l<!' + it_behaves_like :string_unpack_64bit_le_signed, 'l!<' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_64bit_be, 'l>_' + it_behaves_like :string_unpack_64bit_be, 'l_>' + it_behaves_like :string_unpack_64bit_be_signed, 'l>_' + it_behaves_like :string_unpack_64bit_be_signed, 'l_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_64bit_be, 'l>!' + it_behaves_like :string_unpack_64bit_be, 'l!>' + it_behaves_like :string_unpack_64bit_be_signed, 'l>!' + it_behaves_like :string_unpack_64bit_be_signed, 'l!>' + end + end +end + +little_endian do + describe "String#unpack with format 'L'" do + it_behaves_like :string_unpack_basic, 'L' + it_behaves_like :string_unpack_32bit_le, 'L' + it_behaves_like :string_unpack_32bit_le_unsigned, 'L' + end + + describe "String#unpack with format 'l'" do + it_behaves_like :string_unpack_basic, 'l' + it_behaves_like :string_unpack_32bit_le, 'l' + it_behaves_like :string_unpack_32bit_le_signed, 'l' + end + + platform_is c_long_size: 32 do + describe "String#unpack with format 'L' with modifier '_'" do + it_behaves_like :string_unpack_32bit_le, 'L_' + it_behaves_like :string_unpack_32bit_le_unsigned, 'L_' + end + + describe "String#unpack with format 'L' with modifier '!'" do + it_behaves_like :string_unpack_32bit_le, 'L!' + it_behaves_like :string_unpack_32bit_le_unsigned, 'L!' + end + + describe "String#unpack with format 'l' with modifier '_'" do + it_behaves_like :string_unpack_32bit_le, 'l_' + it_behaves_like :string_unpack_32bit_le_signed, 'l' + end + + describe "String#unpack with format 'l' with modifier '!'" do + it_behaves_like :string_unpack_32bit_le, 'l!' + it_behaves_like :string_unpack_32bit_le_signed, 'l' + end + end + + platform_is c_long_size: 64 do + describe "String#unpack with format 'L' with modifier '_'" do + it_behaves_like :string_unpack_64bit_le, 'L_' + it_behaves_like :string_unpack_64bit_le_unsigned, 'L_' + end + + describe "String#unpack with format 'L' with modifier '!'" do + it_behaves_like :string_unpack_64bit_le, 'L!' + it_behaves_like :string_unpack_64bit_le_unsigned, 'L!' + end + + describe "String#unpack with format 'l' with modifier '_'" do + it_behaves_like :string_unpack_64bit_le, 'l_' + it_behaves_like :string_unpack_64bit_le_signed, 'l_' + end + + describe "String#unpack with format 'l' with modifier '!'" do + it_behaves_like :string_unpack_64bit_le, 'l!' + it_behaves_like :string_unpack_64bit_le_signed, 'l!' + end + end +end + +big_endian do + describe "String#unpack with format 'L'" do + it_behaves_like :string_unpack_basic, 'L' + it_behaves_like :string_unpack_32bit_be, 'L' + it_behaves_like :string_unpack_32bit_be_unsigned, 'L' + end + + describe "String#unpack with format 'l'" do + it_behaves_like :string_unpack_basic, 'l' + it_behaves_like :string_unpack_32bit_be, 'l' + it_behaves_like :string_unpack_32bit_be_signed, 'l' + end + + platform_is c_long_size: 32 do + describe "String#unpack with format 'L' with modifier '_'" do + it_behaves_like :string_unpack_32bit_be, 'L_' + it_behaves_like :string_unpack_32bit_be_unsigned, 'L_' + end + + describe "String#unpack with format 'L' with modifier '!'" do + it_behaves_like :string_unpack_32bit_be, 'L!' + it_behaves_like :string_unpack_32bit_be_unsigned, 'L!' + end + + describe "String#unpack with format 'l' with modifier '_'" do + it_behaves_like :string_unpack_32bit_be, 'l_' + it_behaves_like :string_unpack_32bit_be_signed, 'l' + end + + describe "String#unpack with format 'l' with modifier '!'" do + it_behaves_like :string_unpack_32bit_be, 'l!' + it_behaves_like :string_unpack_32bit_be_signed, 'l' + end + end + + platform_is c_long_size: 64 do + describe "String#unpack with format 'L' with modifier '_'" do + it_behaves_like :string_unpack_64bit_be, 'L_' + it_behaves_like :string_unpack_64bit_be_unsigned, 'L_' + end + + describe "String#unpack with format 'L' with modifier '!'" do + it_behaves_like :string_unpack_64bit_be, 'L!' + it_behaves_like :string_unpack_64bit_be_unsigned, 'L!' + end + + describe "String#unpack with format 'l' with modifier '_'" do + it_behaves_like :string_unpack_64bit_be, 'l_' + it_behaves_like :string_unpack_64bit_be_signed, 'l_' + end + + describe "String#unpack with format 'l' with modifier '!'" do + it_behaves_like :string_unpack_64bit_be, 'l!' + it_behaves_like :string_unpack_64bit_be_signed, 'l!' + end + end + +end diff --git a/spec/ruby/core/string/unpack/m_spec.rb b/spec/ruby/core/string/unpack/m_spec.rb new file mode 100644 index 0000000000..357987a053 --- /dev/null +++ b/spec/ruby/core/string/unpack/m_spec.rb @@ -0,0 +1,192 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/taint' + +describe "String#unpack with format 'M'" do + it_behaves_like :string_unpack_basic, 'M' + it_behaves_like :string_unpack_no_platform, 'M' + it_behaves_like :string_unpack_taint, 'M' + + it "decodes an empty string" do + "".unpack("M").should == [""] + end + + it "decodes the complete string ignoring newlines when given a single directive" do + "a=\nb=\nc=\n".unpack("M").should == ["abc"] + end + + it "appends empty string to the array for directives exceeding the input size" do + "a=\nb=\nc=\n".unpack("MMM").should == ["abc", "", ""] + end + + it "ignores the count or '*' modifier and decodes the entire string" do + [ ["a=\nb=\nc=\n", "M238", ["abc"]], + ["a=\nb=\nc=\n", "M*", ["abc"]] + ].should be_computed_by(:unpack) + end + + it "decodes the '=' character" do + "=3D=\n".unpack("M").should == ["="] + end + + it "decodes an embedded space character" do + "a b=\n".unpack("M").should == ["a b"] + end + + it "decodes a space at the end of the pre-encoded string" do + "a =\n".unpack("M").should == ["a "] + end + + it "decodes an embedded tab character" do + "a\tb=\n".unpack("M").should == ["a\tb"] + end + + it "decodes a tab character at the end of the pre-encoded string" do + "a\t=\n".unpack("M").should == ["a\t"] + end + + it "decodes an embedded newline" do + "a\nb=\n".unpack("M").should == ["a\nb"] + end + + it "decodes pre-encoded byte values 33..60" do + [ ["!\"\#$%&'()*+,-./=\n", ["!\"\#$%&'()*+,-./"]], + ["0123456789=\n", ["0123456789"]], + [":;<=\n", [":;<"]] + ].should be_computed_by(:unpack, "M") + end + + it "decodes pre-encoded byte values 62..126" do + [ [">?@=\n", [">?@"]], + ["ABCDEFGHIJKLMNOPQRSTUVWXYZ=\n", ["ABCDEFGHIJKLMNOPQRSTUVWXYZ"]], + ["[\\]^_`=\n", ["[\\]^_`"]], + ["abcdefghijklmnopqrstuvwxyz=\n", ["abcdefghijklmnopqrstuvwxyz"]], + ["{|}~=\n", ["{|}~"]] + ].should be_computed_by(:unpack, "M") + end + + it "decodes pre-encoded byte values 0..31 except tab and newline" do + [ ["=00=01=02=03=04=05=06=\n", ["\x00\x01\x02\x03\x04\x05\x06"]], + ["=07=08=0B=0C=0D=\n", ["\a\b\v\f\r"]], + ["=0E=0F=10=11=12=13=14=\n", ["\x0e\x0f\x10\x11\x12\x13\x14"]], + ["=15=16=17=18=19=1A=\n", ["\x15\x16\x17\x18\x19\x1a"]], + ["=1B=\n", ["\e"]], + ["=1C=1D=1E=1F=\n", ["\x1c\x1d\x1e\x1f"]] + ].should be_computed_by(:unpack, "M") + end + + it "decodes pre-encoded byte values 127..255" do + [ ["=7F=80=81=82=83=84=85=86=\n", ["\x7f\x80\x81\x82\x83\x84\x85\x86"]], + ["=87=88=89=8A=8B=8C=8D=8E=\n", ["\x87\x88\x89\x8a\x8b\x8c\x8d\x8e"]], + ["=8F=90=91=92=93=94=95=96=\n", ["\x8f\x90\x91\x92\x93\x94\x95\x96"]], + ["=97=98=99=9A=9B=9C=9D=9E=\n", ["\x97\x98\x99\x9a\x9b\x9c\x9d\x9e"]], + ["=9F=A0=A1=A2=A3=A4=A5=A6=\n", ["\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6"]], + ["=A7=A8=A9=AA=AB=AC=AD=AE=\n", ["\xa7\xa8\xa9\xaa\xab\xac\xad\xae"]], + ["=AF=B0=B1=B2=B3=B4=B5=B6=\n", ["\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6"]], + ["=B7=B8=B9=BA=BB=BC=BD=BE=\n", ["\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe"]], + ["=BF=C0=C1=C2=C3=C4=C5=C6=\n", ["\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6"]], + ["=C7=C8=C9=CA=CB=CC=CD=CE=\n", ["\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce"]], + ["=CF=D0=D1=D2=D3=D4=D5=D6=\n", ["\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6"]], + ["=D7=D8=D9=DA=DB=DC=DD=DE=\n", ["\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde"]], + ["=DF=E0=E1=E2=E3=E4=E5=E6=\n", ["\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6"]], + ["=E7=E8=E9=EA=EB=EC=ED=EE=\n", ["\xe7\xe8\xe9\xea\xeb\xec\xed\xee"]], + ["=EF=F0=F1=F2=F3=F4=F5=F6=\n", ["\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6"]], + ["=F7=F8=F9=FA=FB=FC=FD=FE=\n", ["\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe"]], + ["=FF=\n", ["\xff"]] + ].should be_computed_by(:unpack, "M") + end + + it "unpacks incomplete escape sequences as literal characters" do + "foo=".unpack("M").should == ["foo="] + "foo=4".unpack("M").should == ["foo=4"] + end +end + +describe "String#unpack with format 'm'" do + it_behaves_like :string_unpack_basic, 'm' + it_behaves_like :string_unpack_no_platform, 'm' + it_behaves_like :string_unpack_taint, 'm' + + it "decodes an empty string" do + "".unpack("m").should == [""] + end + + it "decodes the complete string ignoring newlines when given a single directive" do + "YWJj\nREVG\n".unpack("m").should == ["abcDEF"] + end + + it "ignores the count or '*' modifier and decodes the entire string" do + [ ["YWJj\nREVG\n", "m238", ["abcDEF"]], + ["YWJj\nREVG\n", "m*", ["abcDEF"]] + ].should be_computed_by(:unpack) + end + + it "appends empty string to the array for directives exceeding the input size" do + "YWJj\nREVG\n".unpack("mmm").should == ["abcDEF", "", ""] + end + + it "decodes all pre-encoded ascii byte values" do + [ ["AAECAwQFBg==\n", ["\x00\x01\x02\x03\x04\x05\x06"]], + ["BwgJCgsMDQ==\n", ["\a\b\t\n\v\f\r"]], + ["Dg8QERITFBUW\n", ["\x0E\x0F\x10\x11\x12\x13\x14\x15\x16"]], + ["FxgZGhscHR4f\n", ["\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f"]], + ["ISIjJCUmJygpKissLS4v\n", ["!\"\#$%&'()*+,-./"]], + ["MDEyMzQ1Njc4OQ==\n", ["0123456789"]], + ["Ojs8PT4/QA==\n", [":;<=>?@"]], + ["QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo=\n", ["ABCDEFGHIJKLMNOPQRSTUVWXYZ"]], + ["W1xdXl9g\n", ["[\\]^_`"]], + ["YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=\n", ["abcdefghijklmnopqrstuvwxyz"]], + ["e3x9fg==\n", ["{|}~"]], + ["f8KAwoHCgsKD\n", ["\x7f\xc2\x80\xc2\x81\xc2\x82\xc2\x83"]], + ["woTChcKGwofC\n", ["\xc2\x84\xc2\x85\xc2\x86\xc2\x87\xc2"]], + ["iMKJworCi8KM\n", ["\x88\xc2\x89\xc2\x8a\xc2\x8b\xc2\x8c"]], + ["wo3CjsKPwpDC\n", ["\xc2\x8d\xc2\x8e\xc2\x8f\xc2\x90\xc2"]], + ["kcKSwpPClMKV\n", ["\x91\xc2\x92\xc2\x93\xc2\x94\xc2\x95"]], + ["wpbCl8KYwpnC\n", ["\xc2\x96\xc2\x97\xc2\x98\xc2\x99\xc2"]], + ["msKbwpzCncKe\n", ["\x9a\xc2\x9b\xc2\x9c\xc2\x9d\xc2\x9e"]], + ["wp/CoMKhwqLC\n", ["\xc2\x9f\xc2\xa0\xc2\xa1\xc2\xa2\xc2"]], + ["o8KkwqXCpsKn\n", ["\xa3\xc2\xa4\xc2\xa5\xc2\xa6\xc2\xa7"]], + ["wqjCqcKqwqvC\n", ["\xc2\xa8\xc2\xa9\xc2\xaa\xc2\xab\xc2"]], + ["rMKtwq7Cr8Kw\n", ["\xac\xc2\xad\xc2\xae\xc2\xaf\xc2\xb0"]], + ["wrHCssKzwrTC\n", ["\xc2\xb1\xc2\xb2\xc2\xb3\xc2\xb4\xc2"]], + ["tcK2wrfCuMK5\n", ["\xb5\xc2\xb6\xc2\xb7\xc2\xb8\xc2\xb9"]], + ["wrrCu8K8wr3C\n", ["\xc2\xba\xc2\xbb\xc2\xbc\xc2\xbd\xc2"]], + ["vsK/w4DDgcOC\n", ["\xbe\xc2\xbf\xc3\x80\xc3\x81\xc3\x82"]], + ["w4PDhMOFw4bD\n", ["\xc3\x83\xc3\x84\xc3\x85\xc3\x86\xc3"]], + ["h8OIw4nDisOL\n", ["\x87\xc3\x88\xc3\x89\xc3\x8a\xc3\x8b"]], + ["w4zDjcOOw4/D\n", ["\xc3\x8c\xc3\x8d\xc3\x8e\xc3\x8f\xc3"]], + ["kMORw5LDk8OU\n", ["\x90\xc3\x91\xc3\x92\xc3\x93\xc3\x94"]], + ["w5XDlsOXw5jD\n", ["\xc3\x95\xc3\x96\xc3\x97\xc3\x98\xc3"]], + ["mcOaw5vDnMOd\n", ["\x99\xc3\x9a\xc3\x9b\xc3\x9c\xc3\x9d"]], + ["w57Dn8Ogw6HD\n", ["\xc3\x9e\xc3\x9f\xc3\xa0\xc3\xa1\xc3"]], + ["osOjw6TDpcOm\n", ["\xa2\xc3\xa3\xc3\xa4\xc3\xa5\xc3\xa6"]], + ["w6fDqMOpw6rD\n", ["\xc3\xa7\xc3\xa8\xc3\xa9\xc3\xaa\xc3"]], + ["q8Osw63DrsOv\n", ["\xab\xc3\xac\xc3\xad\xc3\xae\xc3\xaf"]], + ["w7DDscOyw7PD\n", ["\xc3\xb0\xc3\xb1\xc3\xb2\xc3\xb3\xc3"]], + ["tMO1w7bDt8O4\n", ["\xb4\xc3\xb5\xc3\xb6\xc3\xb7\xc3\xb8"]], + ["w7nDusO7w7zD\n", ["\xc3\xb9\xc3\xba\xc3\xbb\xc3\xbc\xc3"]], + ["vcO+w78=\n", ["\xbd\xc3\xbe\xc3\xbf"]] + ].should be_computed_by(:unpack, "m") + end + + it "produces binary strings" do + "".unpack("m").first.encoding.should == Encoding::BINARY + "Ojs8PT4/QA==\n".unpack("m").first.encoding.should == Encoding::BINARY + end + + it "does not raise an error for an invalid base64 character" do + "dGV%zdA==".unpack("m").should == ["test"] + end + + describe "when given count 0" do + it "decodes base64" do + "dGVzdA==".unpack("m0").should == ["test"] + end + + it "raises an ArgumentError for an invalid base64 character" do + -> { "dGV%zdA==".unpack("m0") }.should raise_error(ArgumentError) + end + end +end diff --git a/spec/ruby/core/string/unpack/n_spec.rb b/spec/ruby/core/string/unpack/n_spec.rb new file mode 100644 index 0000000000..09173f4fcb --- /dev/null +++ b/spec/ruby/core/string/unpack/n_spec.rb @@ -0,0 +1,18 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/integer' + +describe "String#unpack with format 'N'" do + it_behaves_like :string_unpack_basic, 'N' + it_behaves_like :string_unpack_32bit_be, 'N' + it_behaves_like :string_unpack_32bit_be_unsigned, 'N' + it_behaves_like :string_unpack_no_platform, 'N' +end + +describe "String#unpack with format 'n'" do + it_behaves_like :string_unpack_basic, 'n' + it_behaves_like :string_unpack_16bit_be, 'n' + it_behaves_like :string_unpack_16bit_be_unsigned, 'n' + it_behaves_like :string_unpack_no_platform, 'n' +end diff --git a/spec/ruby/core/string/unpack/p_spec.rb b/spec/ruby/core/string/unpack/p_spec.rb new file mode 100644 index 0000000000..cd48c0523d --- /dev/null +++ b/spec/ruby/core/string/unpack/p_spec.rb @@ -0,0 +1,44 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/taint' + +describe "String#unpack with format 'P'" do + it_behaves_like :string_unpack_basic, 'P' + it_behaves_like :string_unpack_taint, 'P' + + it "round-trips a string through pack and unpack" do + ["hello"].pack("P").unpack("P5").should == ["hello"] + end + + it "cannot unpack a string except from the same object that created it, or a duplicate of it" do + packed = ["hello"].pack("P") + packed.unpack("P5").should == ["hello"] + packed.dup.unpack("P5").should == ["hello"] + -> { packed.to_sym.to_s.unpack("P5") }.should raise_error(ArgumentError, /no associated pointer/) + end + + it "reads as many characters as specified" do + ["hello"].pack("P").unpack("P1").should == ["h"] + end + + it "reads only as far as a NUL character" do + ["hello"].pack("P").unpack("P10").should == ["hello"] + end +end + +describe "String#unpack with format 'p'" do + it_behaves_like :string_unpack_basic, 'p' + it_behaves_like :string_unpack_taint, 'p' + + it "round-trips a string through pack and unpack" do + ["hello"].pack("p").unpack("p").should == ["hello"] + end + + it "cannot unpack a string except from the same object that created it, or a duplicate of it" do + packed = ["hello"].pack("p") + packed.unpack("p").should == ["hello"] + packed.dup.unpack("p").should == ["hello"] + -> { packed.to_sym.to_s.unpack("p") }.should raise_error(ArgumentError, /no associated pointer/) + end +end diff --git a/spec/ruby/core/string/unpack/percent_spec.rb b/spec/ruby/core/string/unpack/percent_spec.rb new file mode 100644 index 0000000000..0e27663195 --- /dev/null +++ b/spec/ruby/core/string/unpack/percent_spec.rb @@ -0,0 +1,7 @@ +require_relative '../../../spec_helper' + +describe "String#unpack with format '%'" do + it "raises an Argument Error" do + -> { "abc".unpack("%") }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/unpack/q_spec.rb b/spec/ruby/core/string/unpack/q_spec.rb new file mode 100644 index 0000000000..2f667d6c4d --- /dev/null +++ b/spec/ruby/core/string/unpack/q_spec.rb @@ -0,0 +1,64 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/integer' + +describe "String#unpack with format 'Q'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_64bit_le, 'Q<' + it_behaves_like :string_unpack_64bit_le_unsigned, 'Q<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_64bit_be, 'Q>' + it_behaves_like :string_unpack_64bit_be_unsigned, 'Q>' + end +end + +describe "String#unpack with format 'q'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_64bit_le, 'q<' + it_behaves_like :string_unpack_64bit_le_signed, 'q<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_64bit_be, 'q>' + it_behaves_like :string_unpack_64bit_be_signed, 'q>' + end +end + +describe "String#unpack with format 'Q'" do + it_behaves_like :string_unpack_basic, 'Q' +end + +describe "String#unpack with format 'q'" do + it_behaves_like :string_unpack_basic, 'q' +end + +little_endian do + describe "String#unpack with format 'Q'" do + it_behaves_like :string_unpack_64bit_le, 'Q' + it_behaves_like :string_unpack_64bit_le_extra, 'Q' + it_behaves_like :string_unpack_64bit_le_unsigned, 'Q' + end + + describe "String#unpack with format 'q'" do + it_behaves_like :string_unpack_64bit_le, 'q' + it_behaves_like :string_unpack_64bit_le_extra, 'q' + it_behaves_like :string_unpack_64bit_le_signed, 'q' + end +end + +big_endian do + describe "String#unpack with format 'Q'" do + it_behaves_like :string_unpack_64bit_be, 'Q' + it_behaves_like :string_unpack_64bit_be_extra, 'Q' + it_behaves_like :string_unpack_64bit_be_unsigned, 'Q' + end + + describe "String#unpack with format 'q'" do + it_behaves_like :string_unpack_64bit_be, 'q' + it_behaves_like :string_unpack_64bit_be_extra, 'q' + it_behaves_like :string_unpack_64bit_be_signed, 'q' + end +end diff --git a/spec/ruby/core/string/unpack/s_spec.rb b/spec/ruby/core/string/unpack/s_spec.rb new file mode 100644 index 0000000000..d331fd720e --- /dev/null +++ b/spec/ruby/core/string/unpack/s_spec.rb @@ -0,0 +1,152 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/integer' + +describe "String#unpack with format 'S'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_16bit_le, 'S<' + it_behaves_like :string_unpack_16bit_le_unsigned, 'S<' + end + + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_16bit_le, 'S<_' + it_behaves_like :string_unpack_16bit_le, 'S_<' + it_behaves_like :string_unpack_16bit_le_unsigned, 'S_<' + it_behaves_like :string_unpack_16bit_le_unsigned, 'S<_' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_16bit_le, 'S<!' + it_behaves_like :string_unpack_16bit_le, 'S!<' + it_behaves_like :string_unpack_16bit_le_unsigned, 'S!<' + it_behaves_like :string_unpack_16bit_le_unsigned, 'S<!' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_16bit_be, 'S>' + it_behaves_like :string_unpack_16bit_be_unsigned, 'S>' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_16bit_be, 'S>_' + it_behaves_like :string_unpack_16bit_be, 'S_>' + it_behaves_like :string_unpack_16bit_be_unsigned, 'S>_' + it_behaves_like :string_unpack_16bit_be_unsigned, 'S_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_16bit_be, 'S>!' + it_behaves_like :string_unpack_16bit_be, 'S!>' + it_behaves_like :string_unpack_16bit_be_unsigned, 'S>!' + it_behaves_like :string_unpack_16bit_be_unsigned, 'S!>' + end +end + +describe "String#unpack with format 's'" do + describe "with modifier '<'" do + it_behaves_like :string_unpack_16bit_le, 's<' + it_behaves_like :string_unpack_16bit_le_signed, 's<' + end + + describe "with modifier '<' and '_'" do + it_behaves_like :string_unpack_16bit_le, 's<_' + it_behaves_like :string_unpack_16bit_le, 's_<' + it_behaves_like :string_unpack_16bit_le_signed, 's<_' + it_behaves_like :string_unpack_16bit_le_signed, 's_<' + end + + describe "with modifier '<' and '!'" do + it_behaves_like :string_unpack_16bit_le, 's<!' + it_behaves_like :string_unpack_16bit_le, 's!<' + it_behaves_like :string_unpack_16bit_le_signed, 's<!' + it_behaves_like :string_unpack_16bit_le_signed, 's!<' + end + + describe "with modifier '>'" do + it_behaves_like :string_unpack_16bit_be, 's>' + it_behaves_like :string_unpack_16bit_be_signed, 's>' + end + + describe "with modifier '>' and '_'" do + it_behaves_like :string_unpack_16bit_be, 's>_' + it_behaves_like :string_unpack_16bit_be, 's_>' + it_behaves_like :string_unpack_16bit_be_signed, 's>_' + it_behaves_like :string_unpack_16bit_be_signed, 's_>' + end + + describe "with modifier '>' and '!'" do + it_behaves_like :string_unpack_16bit_be, 's>!' + it_behaves_like :string_unpack_16bit_be, 's!>' + it_behaves_like :string_unpack_16bit_be_signed, 's>!' + it_behaves_like :string_unpack_16bit_be_signed, 's!>' + end +end + +little_endian do + describe "String#unpack with format 'S'" do + it_behaves_like :string_unpack_basic, 'S' + it_behaves_like :string_unpack_16bit_le, 'S' + it_behaves_like :string_unpack_16bit_le_unsigned, 'S' + end + + describe "String#unpack with format 'S' with modifier '_'" do + it_behaves_like :string_unpack_16bit_le, 'S_' + it_behaves_like :string_unpack_16bit_le_unsigned, 'S_' + end + + describe "String#unpack with format 'S' with modifier '!'" do + it_behaves_like :string_unpack_16bit_le, 'S!' + it_behaves_like :string_unpack_16bit_le_unsigned, 'S!' + end + + describe "String#unpack with format 's'" do + it_behaves_like :string_unpack_basic, 's' + it_behaves_like :string_unpack_16bit_le, 's' + it_behaves_like :string_unpack_16bit_le_signed, 's' + end + + describe "String#unpack with format 's' with modifier '_'" do + it_behaves_like :string_unpack_16bit_le, 's_' + it_behaves_like :string_unpack_16bit_le_signed, 's_' + end + + describe "String#unpack with format 's' with modifier '!'" do + it_behaves_like :string_unpack_16bit_le, 's!' + it_behaves_like :string_unpack_16bit_le_signed, 's!' + end +end + +big_endian do + describe "String#unpack with format 'S'" do + it_behaves_like :string_unpack_basic, 'S' + it_behaves_like :string_unpack_16bit_be, 'S' + it_behaves_like :string_unpack_16bit_be_unsigned, 'S' + end + + describe "String#unpack with format 'S' with modifier '_'" do + it_behaves_like :string_unpack_16bit_be, 'S_' + it_behaves_like :string_unpack_16bit_be_unsigned, 'S_' + end + + describe "String#unpack with format 'S' with modifier '!'" do + it_behaves_like :string_unpack_16bit_be, 'S!' + it_behaves_like :string_unpack_16bit_be_unsigned, 'S!' + end + + describe "String#unpack with format 's'" do + it_behaves_like :string_unpack_basic, 's' + it_behaves_like :string_unpack_16bit_be, 's' + it_behaves_like :string_unpack_16bit_be_signed, 's' + end + + describe "String#unpack with format 's' with modifier '_'" do + it_behaves_like :string_unpack_16bit_be, 's_' + it_behaves_like :string_unpack_16bit_be_signed, 's_' + end + + describe "String#unpack with format 's' with modifier '!'" do + it_behaves_like :string_unpack_16bit_be, 's!' + it_behaves_like :string_unpack_16bit_be_signed, 's!' + end +end diff --git a/spec/ruby/core/string/unpack/shared/basic.rb b/spec/ruby/core/string/unpack/shared/basic.rb new file mode 100644 index 0000000000..734630bda0 --- /dev/null +++ b/spec/ruby/core/string/unpack/shared/basic.rb @@ -0,0 +1,37 @@ +describe :string_unpack_basic, shared: true do + it "ignores whitespace in the format string" do + "abc".unpack("a \t\n\v\f\r"+unpack_format).should be_an_instance_of(Array) + end + + it "calls #to_str to coerce the directives string" do + d = mock("unpack directive") + d.should_receive(:to_str).and_return("a"+unpack_format) + "abc".unpack(d).should be_an_instance_of(Array) + end + + ruby_version_is ""..."3.3" do + it "warns about using an unknown directive" do + -> { "abcdefgh".unpack("a R" + unpack_format) }.should complain(/unknown unpack directive 'R' in 'a R#{unpack_format}'/) + -> { "abcdefgh".unpack("a 0" + unpack_format) }.should complain(/unknown unpack directive '0' in 'a 0#{unpack_format}'/) + -> { "abcdefgh".unpack("a :" + unpack_format) }.should complain(/unknown unpack directive ':' in 'a :#{unpack_format}'/) + end + end + + ruby_version_is "3.3" do + it "raises ArgumentError when a directive is unknown" do + -> { "abcdefgh".unpack("a K" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive 'K' in 'a K#{unpack_format}'") + -> { "abcdefgh".unpack("a 0" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive '0' in 'a 0#{unpack_format}'") + -> { "abcdefgh".unpack("a :" + unpack_format) }.should raise_error(ArgumentError, "unknown unpack directive ':' in 'a :#{unpack_format}'") + end + end +end + +describe :string_unpack_no_platform, shared: true do + it "raises an ArgumentError when the format modifier is '_'" do + -> { "abcdefgh".unpack(unpack_format("_")) }.should raise_error(ArgumentError) + end + + it "raises an ArgumentError when the format modifier is '!'" do + -> { "abcdefgh".unpack(unpack_format("!")) }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/unpack/shared/float.rb b/spec/ruby/core/string/unpack/shared/float.rb new file mode 100644 index 0000000000..b31c2c8bdc --- /dev/null +++ b/spec/ruby/core/string/unpack/shared/float.rb @@ -0,0 +1,319 @@ +# encoding: binary + +describe :string_unpack_float_le, shared: true do + it "decodes one float for a single format character" do + "\x8f\xc2\xb5?".unpack(unpack_format).should == [1.4199999570846558] + end + + it "decodes a negative float" do + "\xcd\xcc\x08\xc2".unpack(unpack_format).should == [-34.200000762939453] + end + + it "decodes two floats for two format characters" do + array = "\x9a\x999@33\xb3?".unpack(unpack_format(nil, 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end + + it "decodes the number of floats requested by the count modifier" do + array = "\x9a\x999@33\xb3?33\x03A".unpack(unpack_format(3)) + array.should == [2.9000000953674316, 1.399999976158142, 8.199999809265137] + end + + it "decodes the remaining floats when passed the '*' modifier" do + array = "\x9a\x999@33\xb3?33\x03A".unpack(unpack_format("*")) + array.should == [2.9000000953674316, 1.399999976158142, 8.199999809265137] + end + + it "decodes the remaining floats when passed the '*' modifier after another directive" do + array = "\x9a\x99\xa9@33\x13A".unpack(unpack_format()+unpack_format('*')) + array.should == [5.300000190734863, 9.199999809265137] + end + + it "does not decode a float when fewer bytes than a float remain and the '*' modifier is passed" do + [ ["\xff", []], + ["\xff\x00", []], + ["\xff\x00\xff", []] + ].should be_computed_by(:unpack, unpack_format("*")) + end + + it "adds nil for each element requested beyond the end of the String" do + [ ["abc", [nil, nil, nil]], + ["\x8f\xc2\xb5?abc", [1.4199999570846558, nil, nil]], + ["\x9a\x999@33\xb3?abc", [2.9000000953674316, 1.399999976158142, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end + + it "decodes positive Infinity" do + "\x00\x00\x80\x7f".unpack(unpack_format).should == [infinity_value] + end + + it "decodes negative Infinity" do + "\x00\x00\x80\xff".unpack(unpack_format).should == [-infinity_value] + end + + it "decodes NaN" do + # mumble mumble NaN mumble https://bugs.ruby-lang.org/issues/5884 + [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + array = "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x9a\x999@33\xb3?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + array = "\x9a\x999@33\xb3?".unpack(unpack_format(' ', 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end +end + +describe :string_unpack_float_be, shared: true do + it "decodes one float for a single format character" do + "?\xb5\xc2\x8f".unpack(unpack_format).should == [1.4199999570846558] + end + + it "decodes a negative float" do + "\xc2\x08\xcc\xcd".unpack(unpack_format).should == [-34.200000762939453] + end + + it "decodes two floats for two format characters" do + array = "@9\x99\x9a?\xb333".unpack(unpack_format(nil, 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end + + it "decodes the number of floats requested by the count modifier" do + array = "@9\x99\x9a?\xb333A\x0333".unpack(unpack_format(3)) + array.should == [2.9000000953674316, 1.399999976158142, 8.199999809265137] + end + + it "decodes the remaining floats when passed the '*' modifier" do + array = "@9\x99\x9a?\xb333A\x0333".unpack(unpack_format("*")) + array.should == [2.9000000953674316, 1.399999976158142, 8.199999809265137] + end + + it "decodes the remaining floats when passed the '*' modifier after another directive" do + array = "@\xa9\x99\x9aA\x1333".unpack(unpack_format()+unpack_format('*')) + array.should == [5.300000190734863, 9.199999809265137] + end + + it "does not decode a float when fewer bytes than a float remain and the '*' modifier is passed" do + [ ["\xff", []], + ["\xff\x00", []], + ["\xff\x00\xff", []] + ].should be_computed_by(:unpack, unpack_format("*")) + end + + it "adds nil for each element requested beyond the end of the String" do + [ ["abc", [nil, nil, nil]], + ["?\xb5\xc2\x8fabc", [1.4199999570846558, nil, nil]], + ["@9\x99\x9a?\xb333abc", [2.9000000953674316, 1.399999976158142, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end + + it "decodes positive Infinity" do + "\x7f\x80\x00\x00".unpack(unpack_format).should == [infinity_value] + end + + it "decodes negative Infinity" do + "\xff\x80\x00\x00".unpack(unpack_format).should == [-infinity_value] + end + + it "decodes NaN" do + # mumble mumble NaN mumble https://bugs.ruby-lang.org/issues/5884 + [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + array = "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@9\x99\x9a?\xb333".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + array = "@9\x99\x9a?\xb333".unpack(unpack_format(' ', 2)) + array.should == [2.9000000953674316, 1.399999976158142] + end +end + +describe :string_unpack_double_le, shared: true do + it "decodes one double for a single format character" do + "\xb8\x1e\x85\xebQ\xb8\xf6?".unpack(unpack_format).should == [1.42] + end + + it "decodes a negative double" do + "\x9a\x99\x99\x99\x99\x19A\xc0".unpack(unpack_format).should == [-34.2] + end + + it "decodes two doubles for two format characters" do + "333333\x07@ffffff\xf6?".unpack(unpack_format(nil, 2)).should == [2.9, 1.4] + end + + it "decodes the number of doubles requested by the count modifier" do + array = "333333\x07@ffffff\xf6?ffffff\x20@".unpack(unpack_format(3)) + array.should == [2.9, 1.4, 8.2] + end + + it "decodes the remaining doubles when passed the '*' modifier" do + array = "333333\x07@ffffff\xf6?ffffff\x20@".unpack(unpack_format("*")) + array.should == [2.9, 1.4, 8.2] + end + + it "decodes the remaining doubles when passed the '*' modifier after another directive" do + array = "333333\x15@ffffff\x22@".unpack(unpack_format()+unpack_format('*')) + array.should == [5.3, 9.2] + end + + it "does not decode a double when fewer bytes than a double remain and the '*' modifier is passed" do + [ ["\xff", []], + ["\xff\x00", []], + ["\xff\x00\xff", []], + ["\xff\x00\xff\x00", []], + ["\xff\x00\xff\x00\xff", []], + ["\xff\x00\xff\x00\xff\x00", []], + ["\xff\x00\xff\x00\xff\x00\xff", []] + ].should be_computed_by(:unpack, unpack_format("*")) + end + + it "adds nil for each element requested beyond the end of the String" do + [ ["\xff\x00\xff\x00\xff\x00\xff", [nil, nil, nil]], + ["\xb8\x1e\x85\xebQ\xb8\xf6?abc", [1.42, nil, nil]], + ["333333\x07@ffffff\xf6?abcd", [2.9, 1.4, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end + + it "decodes positive Infinity" do + "\x00\x00\x00\x00\x00\x00\xf0\x7f".unpack(unpack_format).should == [infinity_value] + end + + it "decodes negative Infinity" do + "\x00\x00\x00\x00\x00\x00\xf0\xff".unpack(unpack_format).should == [-infinity_value] + end + + it "decodes NaN" do + # mumble mumble NaN mumble https://bugs.ruby-lang.org/issues/5884 + [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "333333\x07@ffffff\xf6?".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "333333\x07@ffffff\xf6?".unpack(unpack_format(' ', 2)).should == [2.9, 1.4] + end +end + +describe :string_unpack_double_be, shared: true do + it "decodes one double for a single format character" do + "?\xf6\xb8Q\xeb\x85\x1e\xb8".unpack(unpack_format).should == [1.42] + end + + it "decodes a negative double" do + "\xc0A\x19\x99\x99\x99\x99\x9a".unpack(unpack_format).should == [-34.2] + end + + it "decodes two doubles for two format characters" do + "@\x07333333?\xf6ffffff".unpack(unpack_format(nil, 2)).should == [2.9, 1.4] + end + + it "decodes the number of doubles requested by the count modifier" do + array = "@\x07333333?\xf6ffffff@\x20ffffff".unpack(unpack_format(3)) + array.should == [2.9, 1.4, 8.2] + end + + it "decodes the remaining doubles when passed the '*' modifier" do + array = "@\x07333333?\xf6ffffff@\x20ffffff".unpack(unpack_format("*")) + array.should == [2.9, 1.4, 8.2] + end + + it "decodes the remaining doubles when passed the '*' modifier after another directive" do + array = "@\x15333333@\x22ffffff".unpack(unpack_format()+unpack_format('*')) + array.should == [5.3, 9.2] + end + + it "does not decode a double when fewer bytes than a double remain and the '*' modifier is passed" do + [ ["\xff", []], + ["\xff\x00", []], + ["\xff\x00\xff", []], + ["\xff\x00\xff\x00", []], + ["\xff\x00\xff\x00\xff", []], + ["\xff\x00\xff\x00\xff\x00", []], + ["\xff\x00\xff\x00\xff\x00\xff", []] + ].should be_computed_by(:unpack, unpack_format("*")) + end + + it "adds nil for each element requested beyond the end of the String" do + [ ["abcdefg", [nil, nil, nil]], + ["?\xf6\xb8Q\xeb\x85\x1e\xb8abc", [1.42, nil, nil]], + ["@\x07333333?\xf6ffffffabcd", [2.9, 1.4, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end + + it "decodes positive Infinity" do + "\x7f\xf0\x00\x00\x00\x00\x00\x00".unpack(unpack_format).should == [infinity_value] + end + + it "decodes negative Infinity" do + "\xff\xf0\x00\x00\x00\x00\x00\x00".unpack(unpack_format).should == [-infinity_value] + end + + it "decodes NaN" do + # mumble mumble NaN mumble https://bugs.ruby-lang.org/issues/5884 + [nan_value].pack(unpack_format).unpack(unpack_format).first.nan?.should be_true + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)).should == [2.9, 1.4] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "@\x07333333?\xf6ffffff".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "@\x07333333?\xf6ffffff".unpack(unpack_format(' ', 2)).should == [2.9, 1.4] + end +end diff --git a/spec/ruby/core/string/unpack/shared/integer.rb b/spec/ruby/core/string/unpack/shared/integer.rb new file mode 100644 index 0000000000..d3934753ba --- /dev/null +++ b/spec/ruby/core/string/unpack/shared/integer.rb @@ -0,0 +1,411 @@ +# encoding: binary + +describe :string_unpack_16bit_le, shared: true do + it "decodes one short for a single format character" do + "ab".unpack(unpack_format).should == [25185] + end + + it "decodes two shorts for two format characters" do + "abcd".unpack(unpack_format(nil, 2)).should == [25185, 25699] + end + + it "decodes the number of shorts requested by the count modifier" do + "abcdef".unpack(unpack_format(3)).should == [25185, 25699, 26213] + end + + it "decodes the remaining shorts when passed the '*' modifier" do + "abcd".unpack(unpack_format('*')).should == [25185, 25699] + end + + it "decodes the remaining shorts when passed the '*' modifier after another directive" do + "abcd".unpack(unpack_format()+unpack_format('*')).should == [25185, 25699] + end + + it "does not decode a short when fewer bytes than a short remain and the '*' modifier is passed" do + "\xff".unpack(unpack_format('*')).should == [] + end + + it "adds nil for each element requested beyond the end of the String" do + [ ["", [nil, nil, nil]], + ["abc", [25185, nil, nil]], + ["abcd", [25185, 25699, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "abcd".unpack(unpack_format("\000", 2)).should == [25185, 25699] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcd".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "abcd".unpack(unpack_format(' ', 2)).should == [25185, 25699] + end +end + +describe :string_unpack_16bit_le_signed, shared: true do + it "decodes a short with most significant bit set as a negative number" do + "\x00\xff".unpack(unpack_format()).should == [-256] + end +end + +describe :string_unpack_16bit_le_unsigned, shared: true do + it "decodes a short with most significant bit set as a positive number" do + "\x00\xff".unpack(unpack_format()).should == [65280] + end +end + +describe :string_unpack_16bit_be, shared: true do + it "decodes one short for a single format character" do + "ba".unpack(unpack_format).should == [25185] + end + + it "decodes two shorts for two format characters" do + "badc".unpack(unpack_format(nil, 2)).should == [25185, 25699] + end + + it "decodes the number of shorts requested by the count modifier" do + "badcfe".unpack(unpack_format(3)).should == [25185, 25699, 26213] + end + + it "decodes the remaining shorts when passed the '*' modifier" do + "badc".unpack(unpack_format('*')).should == [25185, 25699] + end + + it "decodes the remaining shorts when passed the '*' modifier after another directive" do + "badc".unpack(unpack_format()+unpack_format('*')).should == [25185, 25699] + end + + it "does not decode a short when fewer bytes than a short remain and the '*' modifier is passed" do + "\xff".unpack(unpack_format('*')).should == [] + end + + it "adds nil for each element requested beyond the end of the String" do + [ ["", [nil, nil, nil]], + ["bac", [25185, nil, nil]], + ["badc", [25185, 25699, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "badc".unpack(unpack_format("\000", 2)).should == [25185, 25699] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "badc".unpack(unpack_format(' ', 2)).should == [25185, 25699] + end +end + +describe :string_unpack_16bit_be_signed, shared: true do + it "decodes a short with most significant bit set as a negative number" do + "\xff\x00".unpack(unpack_format()).should == [-256] + end +end + +describe :string_unpack_16bit_be_unsigned, shared: true do + it "decodes a short with most significant bit set as a positive number" do + "\xff\x00".unpack(unpack_format()).should == [65280] + end +end + +describe :string_unpack_32bit_le, shared: true do + it "decodes one int for a single format character" do + "abcd".unpack(unpack_format).should == [1684234849] + end + + it "decodes two ints for two format characters" do + "abghefcd".unpack(unpack_format(nil, 2)).should == [1751605857, 1684235877] + end + + it "decodes the number of ints requested by the count modifier" do + "abcedfgh".unpack(unpack_format(2)).should == [1701012065, 1751606884] + end + + it "decodes the remaining ints when passed the '*' modifier" do + "acbdegfh".unpack(unpack_format('*')).should == [1684169569, 1751541605] + end + + it "decodes the remaining ints when passed the '*' modifier after another directive" do + "abcdefgh".unpack(unpack_format()+unpack_format('*')).should == [1684234849, 1751606885] + end + + it "does not decode an int when fewer bytes than an int remain and the '*' modifier is passed" do + "abc".unpack(unpack_format('*')).should == [] + end + + it "adds nil for each element requested beyond the end of the String" do + [ ["", [nil, nil, nil]], + ["abcde", [1684234849, nil, nil]], + ["abcdefg", [1684234849, nil, nil]], + ["abcdefgh", [1684234849, 1751606885, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "abcdefgh".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "abcdefgh".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "abcdefgh".unpack(unpack_format(' ', 2)).should == [1684234849, 1751606885] + end +end + +describe :string_unpack_32bit_le_signed, shared: true do + it "decodes an int with most significant bit set as a negative number" do + "\x00\xaa\x00\xff".unpack(unpack_format()).should == [-16733696] + end +end + +describe :string_unpack_32bit_le_unsigned, shared: true do + it "decodes an int with most significant bit set as a positive number" do + "\x00\xaa\x00\xff".unpack(unpack_format()).should == [4278233600] + end +end + +describe :string_unpack_32bit_be, shared: true do + it "decodes one int for a single format character" do + "dcba".unpack(unpack_format).should == [1684234849] + end + + it "decodes two ints for two format characters" do + "hgbadcfe".unpack(unpack_format(nil, 2)).should == [1751605857, 1684235877] + end + + it "decodes the number of ints requested by the count modifier" do + "ecbahgfd".unpack(unpack_format(2)).should == [1701012065, 1751606884] + end + + it "decodes the remaining ints when passed the '*' modifier" do + "dbcahfge".unpack(unpack_format('*')).should == [1684169569, 1751541605] + end + + it "decodes the remaining ints when passed the '*' modifier after another directive" do + "dcbahgfe".unpack(unpack_format()+unpack_format('*')).should == [1684234849, 1751606885] + end + + it "does not decode an int when fewer bytes than an int remain and the '*' modifier is passed" do + "abc".unpack(unpack_format('*')).should == [] + end + + it "adds nil for each element requested beyond the end of the String" do + [ ["", [nil, nil, nil]], + ["dcbae", [1684234849, nil, nil]], + ["dcbaefg", [1684234849, nil, nil]], + ["dcbahgfe", [1684234849, 1751606885, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "dcbahgfe".unpack(unpack_format("\000", 2)).should == [1684234849, 1751606885] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "dcbahgfe".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "dcbahgfe".unpack(unpack_format(' ', 2)).should == [1684234849, 1751606885] + end +end + +describe :string_unpack_32bit_be_signed, shared: true do + it "decodes an int with most significant bit set as a negative number" do + "\xff\x00\xaa\x00".unpack(unpack_format()).should == [-16733696] + end +end + +describe :string_unpack_32bit_be_unsigned, shared: true do + it "decodes an int with most significant bit set as a positive number" do + "\xff\x00\xaa\x00".unpack(unpack_format()).should == [4278233600] + end +end + +describe :string_unpack_64bit_le, shared: true do + it "decodes one long for a single format character" do + "abcdefgh".unpack(unpack_format).should == [7523094288207667809] + end + + it "decodes two longs for two format characters" do + array = "abghefcdghefabcd".unpack(unpack_format(nil, 2)) + array.should == [7233738012216484449, 7233733596956420199] + end + + it "decodes the number of longs requested by the count modifier" do + array = "abcedfghefcdghef".unpack(unpack_format(2)) + array.should == [7523094283929477729, 7378418357791581797] + end + + it "decodes the remaining longs when passed the '*' modifier" do + array = "acbdegfhdegfhacb".unpack(unpack_format('*')) + array.should == [7522813912742519649, 7089617339433837924] + end + + it "decodes the remaining longs when passed the '*' modifier after another directive" do + array = "bcahfgedhfgedbca".unpack(unpack_format()+unpack_format('*')) + array.should == [7234302065976107874, 7017560827710891624] + end + + it "does not decode a long when fewer bytes than a long remain and the '*' modifier is passed" do + "abc".unpack(unpack_format('*')).should == [] + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + array = "abcdefghabghefcd".unpack(unpack_format("\000", 2)) + array.should == [7523094288207667809, 7233738012216484449] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "badc".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + array = "abcdefghabghefcd".unpack(unpack_format(' ', 2)) + array.should == [7523094288207667809, 7233738012216484449] + end +end + +describe :string_unpack_64bit_le_extra, shared: true do + it "adds nil for each element requested beyond the end of the String" do + [ ["", [nil, nil, nil]], + ["abcdefgh", [7523094288207667809, nil, nil]], + ["abcdefghcdefab", [7523094288207667809, nil, nil]], + ["abcdefghcdefabde", [7523094288207667809, 7306072665971057763, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end +end + +describe :string_unpack_64bit_le_signed, shared: true do + it "decodes a long with most significant bit set as a negative number" do + "\x00\xcc\x00\xbb\x00\xaa\x00\xff".unpack(unpack_format()).should == [-71870673923814400] + end +end + +describe :string_unpack_64bit_le_unsigned, shared: true do + it "decodes a long with most significant bit set as a positive number" do + "\x00\xcc\x00\xbb\x00\xaa\x00\xff".unpack(unpack_format()).should == [18374873399785737216] + end +end + +describe :string_unpack_64bit_be, shared: true do + it "decodes one long for a single format character" do + "hgfedcba".unpack(unpack_format).should == [7523094288207667809] + end + + it "decodes two longs for two format characters" do + array = "dcfehgbadcbafehg".unpack(unpack_format(nil, 2)) + array.should == [7233738012216484449, 7233733596956420199] + end + + it "decodes the number of longs requested by the count modifier" do + array = "hgfdecbafehgdcfe".unpack(unpack_format(2)) + array.should == [7523094283929477729, 7378418357791581797] + end + + it "decodes the remaining longs when passed the '*' modifier" do + array = "hfgedbcabcahfged".unpack(unpack_format('*')) + array.should == [7522813912742519649, 7089617339433837924] + end + + it "decodes the remaining longs when passed the '*' modifier after another directive" do + array = "degfhacbacbdegfh".unpack(unpack_format()+unpack_format('*')) + array.should == [7234302065976107874, 7017560827710891624] + end + + it "does not decode a long when fewer bytes than a long remain and the '*' modifier is passed" do + "abc".unpack(unpack_format('*')).should == [] + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + array = "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) + array.should == [7523094288207667809, 7233738012216484449] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "hgfedcbadcfehgba".unpack(unpack_format("\000", 2)) + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + array = "hgfedcbadcfehgba".unpack(unpack_format(' ', 2)) + array.should == [7523094288207667809, 7233738012216484449] + end +end + +describe :string_unpack_64bit_be_extra, shared: true do + it "adds nil for each element requested beyond the end of the String" do + [ ["", [nil, nil, nil]], + ["hgfedcba", [7523094288207667809, nil, nil]], + ["hgfedcbacdefab", [7523094288207667809, nil, nil]], + ["hgfedcbaedbafedc", [7523094288207667809, 7306072665971057763, nil]] + ].should be_computed_by(:unpack, unpack_format(3)) + end +end + +describe :string_unpack_64bit_be_signed, shared: true do + it "decodes a long with most significant bit set as a negative number" do + "\xff\x00\xaa\x00\xbb\x00\xcc\x00".unpack(unpack_format()).should == [-71870673923814400] + end +end + +describe :string_unpack_64bit_be_unsigned, shared: true do + it "decodes a long with most significant bit set as a positive number" do + "\xff\x00\xaa\x00\xbb\x00\xcc\x00".unpack(unpack_format()).should == [18374873399785737216] + end +end diff --git a/spec/ruby/core/string/unpack/shared/string.rb b/spec/ruby/core/string/unpack/shared/string.rb new file mode 100644 index 0000000000..9d85eedf26 --- /dev/null +++ b/spec/ruby/core/string/unpack/shared/string.rb @@ -0,0 +1,51 @@ +describe :string_unpack_string, shared: true do + it "returns an empty string if the input is empty" do + "".unpack(unpack_format).should == [""] + end + + it "returns empty strings for repeated formats if the input is empty" do + "".unpack(unpack_format(nil, 3)).should == ["", "", ""] + end + + it "returns an empty string and does not decode any bytes when the count modifier is zero" do + "abc".unpack(unpack_format(0)+unpack_format).should == ["", "a"] + end + + it "implicitly has a count of one when no count is specified" do + "abc".unpack(unpack_format).should == ["a"] + end + + it "decodes the number of bytes specified by the count modifier" do + "abc".unpack(unpack_format(3)).should == ["abc"] + end + + it "decodes the number of bytes specified by the count modifier including whitespace bytes" do + [ ["a bc", ["a b", "c"]], + ["a\fbc", ["a\fb", "c"]], + ["a\nbc", ["a\nb", "c"]], + ["a\rbc", ["a\rb", "c"]], + ["a\tbc", ["a\tb", "c"]], + ["a\vbc", ["a\vb", "c"]] + ].should be_computed_by(:unpack, unpack_format(3)+unpack_format) + end + + it "decodes past whitespace bytes when passed the '*' modifier" do + [ ["a b c", ["a b c"]], + ["a\fb c", ["a\fb c"]], + ["a\nb c", ["a\nb c"]], + ["a\rb c", ["a\rb c"]], + ["a\tb c", ["a\tb c"]], + ["a\vb c", ["a\vb c"]], + ].should be_computed_by(:unpack, unpack_format("*")) + end +end + +describe :string_unpack_Aa, shared: true do + it "decodes the number of bytes specified by the count modifier including NULL bytes" do + "a\x00bc".unpack(unpack_format(3)+unpack_format).should == ["a\x00b", "c"] + end + + it "decodes past NULL bytes when passed the '*' modifier" do + "a\x00b c".unpack(unpack_format("*")).should == ["a\x00b c"] + end +end diff --git a/spec/ruby/core/string/unpack/shared/taint.rb b/spec/ruby/core/string/unpack/shared/taint.rb new file mode 100644 index 0000000000..79c7251f01 --- /dev/null +++ b/spec/ruby/core/string/unpack/shared/taint.rb @@ -0,0 +1,2 @@ +describe :string_unpack_taint, shared: true do +end diff --git a/spec/ruby/core/string/unpack/shared/unicode.rb b/spec/ruby/core/string/unpack/shared/unicode.rb new file mode 100644 index 0000000000..9fe07f53ae --- /dev/null +++ b/spec/ruby/core/string/unpack/shared/unicode.rb @@ -0,0 +1,72 @@ +# -*- encoding: utf-8 -*- + +describe :string_unpack_unicode, shared: true do + it "decodes Unicode codepoints as ASCII values" do + [ ["\x00", [0]], + ["\x01", [1]], + ["\x08", [8]], + ["\x0f", [15]], + ["\x18", [24]], + ["\x1f", [31]], + ["\x7f", [127]], + ["\xc2\x80", [128]], + ["\xc2\x81", [129]], + ["\xc3\xbf", [255]] + ].should be_computed_by(:unpack, "U") + end + + it "decodes the number of characters specified by the count modifier" do + [ ["\xc2\x80\xc2\x81\xc2\x82\xc2\x83", "U1", [0x80]], + ["\xc2\x80\xc2\x81\xc2\x82\xc2\x83", "U2", [0x80, 0x81]], + ["\xc2\x80\xc2\x81\xc2\x82\xc2\x83", "U3", [0x80, 0x81, 0x82]] + ].should be_computed_by(:unpack) + end + + it "implicitly has a count of one when no count modifier is passed" do + "\xc2\x80\xc2\x81\xc2\x82\xc2\x83".unpack("U1").should == [0x80] + end + + it "decodes all remaining characters when passed the '*' modifier" do + "\xc2\x80\xc2\x81\xc2\x82\xc2\x83".unpack("U*").should == [0x80, 0x81, 0x82, 0x83] + end + + it "decodes UTF-8 BMP codepoints" do + [ ["\xc2\x80", [0x80]], + ["\xdf\xbf", [0x7ff]], + ["\xe0\xa0\x80", [0x800]], + ["\xef\xbf\xbf", [0xffff]] + ].should be_computed_by(:unpack, "U") + end + + it "decodes UTF-8 max codepoints" do + [ ["\xf0\x90\x80\x80", [0x10000]], + ["\xf3\xbf\xbf\xbf", [0xfffff]], + ["\xf4\x80\x80\x80", [0x100000]], + ["\xf4\x8f\xbf\xbf", [0x10ffff]] + ].should be_computed_by(:unpack, "U") + end + + it "does not decode any items for directives exceeding the input string size" do + "\xc2\x80".unpack("UUUU").should == [0x80] + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x02".unpack("U\x00U").should == [1, 2] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02".unpack("U\x00U") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "\x01\x02".unpack("U U").should == [1, 2] + end +end diff --git a/spec/ruby/core/string/unpack/u_spec.rb b/spec/ruby/core/string/unpack/u_spec.rb new file mode 100644 index 0000000000..68c8f6f11c --- /dev/null +++ b/spec/ruby/core/string/unpack/u_spec.rb @@ -0,0 +1,97 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/unicode' +require_relative 'shared/taint' + +describe "String#unpack with format 'U'" do + it_behaves_like :string_unpack_basic, 'U' + it_behaves_like :string_unpack_no_platform, 'U' + it_behaves_like :string_unpack_unicode, 'U' + it_behaves_like :string_unpack_taint, 'U' + + it "raises ArgumentError on a malformed byte sequence" do + -> { "\xE3".unpack('U') }.should raise_error(ArgumentError) + end + + it "raises ArgumentError on a malformed byte sequence and doesn't continue when used with the * modifier" do + -> { "\xE3".unpack('U*') }.should raise_error(ArgumentError) + end +end + +describe "String#unpack with format 'u'" do + it_behaves_like :string_unpack_basic, 'u' + it_behaves_like :string_unpack_no_platform, 'u' + it_behaves_like :string_unpack_taint, 'u' + + it "decodes an empty string as an empty string" do + "".unpack("u").should == [""] + end + + it "decodes into raw (ascii) string values" do + str = "".unpack("u")[0] + str.encoding.should == Encoding::BINARY + + str = "1".dup.force_encoding('UTF-8').unpack("u")[0] + str.encoding.should == Encoding::BINARY + end + + it "decodes the complete string ignoring newlines when given a single directive" do + "#86)C\n#1$5&\n".unpack("u").should == ["abcDEF"] + end + + it "appends empty string to the array for directives exceeding the input size" do + "#86)C\n#1$5&\n".unpack("uuu").should == ["abcDEF", "", ""] + end + + it "ignores the count or '*' modifier and decodes the entire string" do + [ ["#86)C\n#1$5&\n", "u238", ["abcDEF"]], + ["#86)C\n#1$5&\n", "u*", ["abcDEF"]] + ].should be_computed_by(:unpack) + end + + it "decodes all ascii characters" do + [ ["'``$\"`P0%!@``\n", ["\x00\x01\x02\x03\x04\x05\x06"]], + ["'!P@)\"@L,#0``\n", ["\a\b\t\n\v\f\r"]], + [")\#@\\0$1(3%!46\n", ["\x0E\x0F\x10\x11\x12\x13\x14\x15\x16"]], + [")%Q@9&AL<'1X?\n", ["\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f"]], + ["/(2(C)\"4F)R@I*BLL+2XO\n", ["!\"\#$%&'()*+,-./"]], + ["*,\#$R,S0U-C<X.0``\n", ["0123456789"]], + ["'.CL\\/3X_0```\n", [":;<=>?@"]], + [":04)#1$5&1TA)2DM,34Y/4%%24U155E=865H`\n", ["ABCDEFGHIJKLMNOPQRSTUVWXYZ"]], + ["&6UQ=7E]@\n", ["[\\]^_`"]], + [":86)C9&5F9VAI:FML;6YO<'%R<W1U=G=X>7H`\n", ["abcdefghijklmnopqrstuvwxyz"]], + ["$>WQ]?@``\n", ["{|}~"]], + [")?\\*`PH'\"@L*#\n", ["\x7f\xc2\x80\xc2\x81\xc2\x82\xc2\x83"]], + [")PH3\"A<*&PH?\"\n", ["\xc2\x84\xc2\x85\xc2\x86\xc2\x87\xc2"]], + [")B,*)PHK\"B\\*,\n", ["\x88\xc2\x89\xc2\x8a\xc2\x8b\xc2\x8c"]], + [")PHW\"CL*/PI#\"\n", ["\xc2\x8d\xc2\x8e\xc2\x8f\xc2\x90\xc2"]], + [")D<*2PI/\"E,*5\n", ["\x91\xc2\x92\xc2\x93\xc2\x94\xc2\x95"]], + [")PI;\"E\\*8PIG\"\n", ["\xc2\x96\xc2\x97\xc2\x98\xc2\x99\xc2"]], + [")FL*;PIS\"G<*>\n", ["\x9a\xc2\x9b\xc2\x9c\xc2\x9d\xc2\x9e"]], + [")PI_\"H,*APJ+\"\n", ["\xc2\x9f\xc2\xa0\xc2\xa1\xc2\xa2\xc2"]], + [")H\\*DPJ7\"IL*G\n", ["\xa3\xc2\xa4\xc2\xa5\xc2\xa6\xc2\xa7"]], + [")PJC\"J<*JPJO\"\n", ["\xc2\xa8\xc2\xa9\xc2\xaa\xc2\xab\xc2"]], + [")K,*MPJ[\"K\\*P\n", ["\xac\xc2\xad\xc2\xae\xc2\xaf\xc2\xb0"]], + [")PK'\"LL*SPK3\"\n", ["\xc2\xb1\xc2\xb2\xc2\xb3\xc2\xb4\xc2"]], + [")M<*VPK?\"N,*Y\n", ["\xb5\xc2\xb6\xc2\xb7\xc2\xb8\xc2\xb9"]], + [")PKK\"N\\*\\PKW\"\n", ["\xc2\xba\xc2\xbb\xc2\xbc\xc2\xbd\xc2"]], + [")OL*_PX#\#@<.\"\n", ["\xbe\xc2\xbf\xc3\x80\xc3\x81\xc3\x82"]], + [")PX/#A,.%PX;#\n", ["\xc3\x83\xc3\x84\xc3\x85\xc3\x86\xc3"]], + [")A\\.(PXG#BL.+\n", ["\x87\xc3\x88\xc3\x89\xc3\x8a\xc3\x8b"]], + [")PXS#C<..PX_#\n", ["\xc3\x8c\xc3\x8d\xc3\x8e\xc3\x8f\xc3"]], + [")D,.1PY+#D\\.4\n", ["\x90\xc3\x91\xc3\x92\xc3\x93\xc3\x94"]], + [")PY7#EL.7PYC#\n", ["\xc3\x95\xc3\x96\xc3\x97\xc3\x98\xc3"]], + [")F<.:PYO#G,.=\n", ["\x99\xc3\x9a\xc3\x9b\xc3\x9c\xc3\x9d"]], + [")PY[#G\\.@PZ'#\n", ["\xc3\x9e\xc3\x9f\xc3\xa0\xc3\xa1\xc3"]], + [")HL.CPZ3#I<.F\n", ["\xa2\xc3\xa3\xc3\xa4\xc3\xa5\xc3\xa6"]], + [")PZ?#J,.IPZK#\n", ["\xc3\xa7\xc3\xa8\xc3\xa9\xc3\xaa\xc3"]], + [")J\\.LPZW#KL.O\n", ["\xab\xc3\xac\xc3\xad\xc3\xae\xc3\xaf"]], + [")P[##L<.RP[/#\n", ["\xc3\xb0\xc3\xb1\xc3\xb2\xc3\xb3\xc3"]], + [")M,.UP[;#M\\.X\n", ["\xb4\xc3\xb5\xc3\xb6\xc3\xb7\xc3\xb8"]], + [")P[G#NL.[P[S#\n", ["\xc3\xb9\xc3\xba\xc3\xbb\xc3\xbc\xc3"]], + ["%O<.^P[\\`\n", ["\xbd\xc3\xbe\xc3\xbf"]] + ].should be_computed_by(:unpack, "u") + end +end diff --git a/spec/ruby/core/string/unpack/v_spec.rb b/spec/ruby/core/string/unpack/v_spec.rb new file mode 100644 index 0000000000..929e8712cb --- /dev/null +++ b/spec/ruby/core/string/unpack/v_spec.rb @@ -0,0 +1,18 @@ +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/integer' + +describe "String#unpack with format 'V'" do + it_behaves_like :string_unpack_basic, 'V' + it_behaves_like :string_unpack_32bit_le, 'V' + it_behaves_like :string_unpack_32bit_le_unsigned, 'V' + it_behaves_like :string_unpack_no_platform, 'V' +end + +describe "String#unpack with format 'v'" do + it_behaves_like :string_unpack_basic, 'v' + it_behaves_like :string_unpack_16bit_le, 'v' + it_behaves_like :string_unpack_16bit_le_unsigned, 'v' + it_behaves_like :string_unpack_no_platform, 'v' +end diff --git a/spec/ruby/core/string/unpack/w_spec.rb b/spec/ruby/core/string/unpack/w_spec.rb new file mode 100644 index 0000000000..7d3533ccae --- /dev/null +++ b/spec/ruby/core/string/unpack/w_spec.rb @@ -0,0 +1,47 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' + +describe "String#unpack with directive 'w'" do + it_behaves_like :string_unpack_basic, 'w' + it_behaves_like :string_unpack_no_platform, 'w' + + it "decodes a BER-compressed integer" do + [ ["\x00", [0]], + ["\x01", [1]], + ["\xce\x0f", [9999]], + ["\x84\x80\x80\x80\x80\x80\x80\x80\x80\x00", [2**65]] + ].should be_computed_by(:unpack, "w") + end + + ruby_version_is ""..."3.3" do + it "ignores NULL bytes between directives" do + suppress_warning do + "\x01\x02\x03".unpack("w\x00w").should == [1, 2] + end + end + end + + ruby_version_is "3.3" do + it "raise ArgumentError for NULL bytes between directives" do + -> { + "\x01\x02\x03".unpack("w\x00w") + }.should raise_error(ArgumentError, /unknown unpack directive/) + end + end + + it "ignores spaces between directives" do + "\x01\x02\x03".unpack("w w").should == [1, 2] + end +end + +describe "String#unpack with directive 'w*'" do + + it "decodes BER-compressed integers" do + "\x01\x02\x03\x04".unpack("w*").should == [1, 2, 3, 4] + "\x00\xCE\x0F\x84\x80\x80\x80\x80\x80\x80\x80\x80\x00\x01\x00".unpack("w*").should == [0, 9999, 2**65, 1, 0] + "\x81\x80\x80\x80\x80\x80\x80\x80\x80\x00\x90\x80\x80\x80\x80\x80\x80\x80\x03\x01\x02".unpack("w*").should == [2**63, (2**60 + 3), 1, 2] + end + +end diff --git a/spec/ruby/core/string/unpack/x_spec.rb b/spec/ruby/core/string/unpack/x_spec.rb new file mode 100644 index 0000000000..2926ebbe0f --- /dev/null +++ b/spec/ruby/core/string/unpack/x_spec.rb @@ -0,0 +1,62 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' + +describe "String#unpack with format 'X'" do + it_behaves_like :string_unpack_basic, 'X' + it_behaves_like :string_unpack_no_platform, 'X' + + it "moves the read index back by the number of bytes specified by count" do + "\x01\x02\x03\x04".unpack("C3X2C").should == [1, 2, 3, 2] + end + + it "does not change the read index when passed a count of zero" do + "\x01\x02\x03\x04".unpack("C3X0C").should == [1, 2, 3, 4] + end + + it "implicitly has a count of one when count is not specified" do + "\x01\x02\x03\x04".unpack("C3XC").should == [1, 2, 3, 3] + end + + it "moves the read index back by the remaining bytes when passed the '*' modifier" do + "abcd".unpack("C3X*C").should == [97, 98, 99, 99] + end + + it "raises an ArgumentError when passed the '*' modifier if the remaining bytes exceed the bytes from the index to the start of the String" do + -> { "abcd".unpack("CX*C") }.should raise_error(ArgumentError) + end + + it "raises an ArgumentError if the count exceeds the bytes from current index to the start of the String" do + -> { "\x01\x02\x03\x04".unpack("C3X4C") }.should raise_error(ArgumentError) + end +end + +describe "String#unpack with format 'x'" do + it_behaves_like :string_unpack_basic, 'x' + it_behaves_like :string_unpack_no_platform, 'x' + + it "moves the read index forward by the number of bytes specified by count" do + "\x01\x02\x03\x04".unpack("Cx2C").should == [1, 4] + end + + it "implicitly has a count of one when count is not specified" do + "\x01\x02\x03\x04".unpack("CxC").should == [1, 3] + end + + it "does not change the read index when passed a count of zero" do + "\x01\x02\x03\x04".unpack("Cx0C").should == [1, 2] + end + + it "moves the read index to the end of the string when passed the '*' modifier" do + "\x01\x02\x03\x04".unpack("Cx*C").should == [1, nil] + end + + it "positions the read index one beyond the last readable byte in the String" do + "\x01\x02\x03\x04".unpack("C2x2C").should == [1, 2, nil] + end + + it "raises an ArgumentError if the count exceeds the size of the String" do + -> { "\x01\x02\x03\x04".unpack("C2x3C") }.should raise_error(ArgumentError) + end +end diff --git a/spec/ruby/core/string/unpack/z_spec.rb b/spec/ruby/core/string/unpack/z_spec.rb new file mode 100644 index 0000000000..1030390550 --- /dev/null +++ b/spec/ruby/core/string/unpack/z_spec.rb @@ -0,0 +1,28 @@ +# encoding: binary +require_relative '../../../spec_helper' +require_relative '../fixtures/classes' +require_relative 'shared/basic' +require_relative 'shared/string' +require_relative 'shared/taint' + +describe "String#unpack with format 'Z'" do + it_behaves_like :string_unpack_basic, 'Z' + it_behaves_like :string_unpack_no_platform, 'Z' + it_behaves_like :string_unpack_string, 'Z' + it_behaves_like :string_unpack_taint, 'Z' + + it "stops decoding at NULL bytes when passed the '*' modifier" do + "a\x00\x00 b \x00c".unpack('Z*Z*Z*Z*').should == ["a", "", " b ", "c"] + end + + it "decodes the number of bytes specified by the count modifier and truncates the decoded string at the first NULL byte" do + [ ["a\x00 \x00b c", ["a", " "]], + ["\x00a\x00 bc \x00", ["", "c"]] + ].should be_computed_by(:unpack, "Z5Z") + end + + it "does not advance past the null byte when given a 'Z' format specifier" do + "a\x00\x0f".unpack('Zxc').should == ['a', 15] + "a\x00\x0f".unpack('Zcc').should == ['a', 0, 15] + end +end diff --git a/spec/ruby/core/string/unpack1_spec.rb b/spec/ruby/core/string/unpack1_spec.rb new file mode 100644 index 0000000000..cfb47fe695 --- /dev/null +++ b/spec/ruby/core/string/unpack1_spec.rb @@ -0,0 +1,47 @@ +require_relative '../../spec_helper' + +describe "String#unpack1" do + it "returns the first value of #unpack" do + "ABCD".unpack1('x3C').should == "ABCD".unpack('x3C')[0] + "\u{3042 3044 3046}".unpack1("U*").should == 0x3042 + "aG9nZWZ1Z2E=".unpack1("m").should == "hogefuga" + "A".unpack1("B*").should == "01000001" + end + + it "starts unpacking from the given offset" do + "ZZABCD".unpack1('x3C', offset: 2).should == "ABCD".unpack('x3C')[0] + "ZZZZaG9nZWZ1Z2E=".unpack1("m", offset: 4).should == "hogefuga" + "ZA".unpack1("B*", offset: 1).should == "01000001" + end + + it "traits offset as a bytes offset" do + "؈".unpack("CC").should == [216, 136] + "؈".unpack1("C").should == 216 + "؈".unpack1("C", offset: 1).should == 136 + end + + it "raises an ArgumentError when the offset is negative" do + -> { "a".unpack1("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative") + end + + it "returns nil if the offset is at the end of the string" do + "a".unpack1("C", offset: 1).should == nil + end + + it "raises an ArgumentError when the offset is larger than the string bytesize" do + -> { "a".unpack1("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string") + end + + context "with format 'm0'" do + # unpack1("m0") takes a special code path that calls Pack.unpackBase46Strict instead of Pack.unpack_m, + # which is why we repeat the tests for unpack("m0") here. + + it "decodes base64" do + "dGVzdA==".unpack1("m0").should == "test" + end + + it "raises an ArgumentError for an invalid base64 character" do + -> { "dGV%zdA==".unpack1("m0") }.should raise_error(ArgumentError) + end + end +end diff --git a/spec/ruby/core/string/unpack_spec.rb b/spec/ruby/core/string/unpack_spec.rb new file mode 100644 index 0000000000..a0abf8fa99 --- /dev/null +++ b/spec/ruby/core/string/unpack_spec.rb @@ -0,0 +1,32 @@ +require_relative '../../spec_helper' + +describe "String#unpack" do + it "raises a TypeError when passed nil" do + -> { "abc".unpack(nil) }.should raise_error(TypeError) + end + + it "raises a TypeError when passed an Integer" do + -> { "abc".unpack(1) }.should raise_error(TypeError) + end + + it "starts unpacking from the given offset" do + "abc".unpack("CC", offset: 1).should == [98, 99] + end + + it "traits offset as a bytes offset" do + "؈".unpack("CC").should == [216, 136] + "؈".unpack("CC", offset: 1).should == [136, nil] + end + + it "raises an ArgumentError when the offset is negative" do + -> { "a".unpack("C", offset: -1) }.should raise_error(ArgumentError, "offset can't be negative") + end + + it "returns nil if the offset is at the end of the string" do + "a".unpack("C", offset: 1).should == [nil] + end + + it "raises an ArgumentError when the offset is larger than the string" do + -> { "a".unpack("C", offset: 2) }.should raise_error(ArgumentError, "offset outside of string") + end +end diff --git a/spec/ruby/core/string/upcase_spec.rb b/spec/ruby/core/string/upcase_spec.rb new file mode 100644 index 0000000000..652de5c2ef --- /dev/null +++ b/spec/ruby/core/string/upcase_spec.rb @@ -0,0 +1,187 @@ +# -*- encoding: utf-8 -*- +# frozen_string_literal: false +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#upcase" do + it "returns a copy of self with all lowercase letters upcased" do + "Hello".upcase.should == "HELLO" + "hello".upcase.should == "HELLO" + end + + it "returns a String in the same encoding as self" do + "hello".encode("US-ASCII").upcase.encoding.should == Encoding::US_ASCII + end + + describe "full Unicode case mapping" do + it "works for all of Unicode with no option" do + "äöü".upcase.should == "ÄÖÜ" + end + + it "updates string metadata" do + upcased = "aßet".upcase + + upcased.should == "ASSET" + upcased.size.should == 5 + upcased.bytesize.should == 5 + upcased.ascii_only?.should be_true + end + end + + describe "ASCII-only case mapping" do + it "does not upcase non-ASCII characters" do + "aßet".upcase(:ascii).should == "AßET" + end + + it "works with substrings" do + "prefix té"[-2..-1].upcase(:ascii).should == "Té" + end + end + + describe "full Unicode case mapping adapted for Turkic languages" do + it "upcases ASCII characters according to Turkic semantics" do + "i".upcase(:turkic).should == "İ" + end + + it "allows Lithuanian as an extra option" do + "i".upcase(:turkic, :lithuanian).should == "İ" + end + + it "does not allow any other additional option" do + -> { "i".upcase(:turkic, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "full Unicode case mapping adapted for Lithuanian" do + it "currently works the same as full Unicode case mapping" do + "iß".upcase(:lithuanian).should == "ISS" + end + + it "allows Turkic as an extra option (and applies Turkic semantics)" do + "iß".upcase(:lithuanian, :turkic).should == "İSS" + end + + it "does not allow any other additional option" do + -> { "iß".upcase(:lithuanian, :ascii) }.should raise_error(ArgumentError) + end + end + + it "does not allow the :fold option for upcasing" do + -> { "abc".upcase(:fold) }.should raise_error(ArgumentError) + end + + it "does not allow invalid options" do + -> { "abc".upcase(:invalid_option) }.should raise_error(ArgumentError) + end + + it "returns a String instance for subclasses" do + StringSpecs::MyString.new("fooBAR").upcase.should be_an_instance_of(String) + end +end + +describe "String#upcase!" do + it "modifies self in place" do + a = "HeLlO" + a.upcase!.should equal(a) + a.should == "HELLO" + end + + it "modifies self in place for non-ascii-compatible encodings" do + a = "HeLlO".encode("utf-16le") + a.upcase! + a.should == "HELLO".encode("utf-16le") + end + + describe "full Unicode case mapping" do + it "modifies self in place for all of Unicode with no option" do + a = "äöü" + a.upcase! + a.should == "ÄÖÜ" + end + + it "works for non-ascii-compatible encodings" do + a = "äöü".encode("utf-16le") + a.upcase! + a.should == "ÄÖÜ".encode("utf-16le") + end + + it "updates string metadata for self" do + upcased = "aßet" + upcased.upcase! + + upcased.should == "ASSET" + upcased.size.should == 5 + upcased.bytesize.should == 5 + upcased.ascii_only?.should be_true + end + end + + describe "modifies self in place for ASCII-only case mapping" do + it "does not upcase non-ASCII characters" do + a = "aßet" + a.upcase!(:ascii) + a.should == "AßET" + end + + it "works for non-ascii-compatible encodings" do + a = "abc".encode("utf-16le") + a.upcase!(:ascii) + a.should == "ABC".encode("utf-16le") + end + end + + describe "modifies self in place for full Unicode case mapping adapted for Turkic languages" do + it "upcases ASCII characters according to Turkic semantics" do + a = "i" + a.upcase!(:turkic) + a.should == "İ" + end + + it "allows Lithuanian as an extra option" do + a = "i" + a.upcase!(:turkic, :lithuanian) + a.should == "İ" + end + + it "does not allow any other additional option" do + -> { a = "i"; a.upcase!(:turkic, :ascii) }.should raise_error(ArgumentError) + end + end + + describe "modifies self in place for full Unicode case mapping adapted for Lithuanian" do + it "currently works the same as full Unicode case mapping" do + a = "iß" + a.upcase!(:lithuanian) + a.should == "ISS" + end + + it "allows Turkic as an extra option (and applies Turkic semantics)" do + a = "iß" + a.upcase!(:lithuanian, :turkic) + a.should == "İSS" + end + + it "does not allow any other additional option" do + -> { a = "iß"; a.upcase!(:lithuanian, :ascii) }.should raise_error(ArgumentError) + end + end + + it "does not allow the :fold option for upcasing" do + -> { a = "abc"; a.upcase!(:fold) }.should raise_error(ArgumentError) + end + + it "does not allow invalid options" do + -> { a = "abc"; a.upcase!(:invalid_option) }.should raise_error(ArgumentError) + end + + it "returns nil if no modifications were made" do + a = "HELLO" + a.upcase!.should == nil + a.should == "HELLO" + end + + it "raises a FrozenError when self is frozen" do + -> { "HeLlo".freeze.upcase! }.should raise_error(FrozenError) + -> { "HELLO".freeze.upcase! }.should raise_error(FrozenError) + end +end diff --git a/spec/ruby/core/string/uplus_spec.rb b/spec/ruby/core/string/uplus_spec.rb new file mode 100644 index 0000000000..20767bcc01 --- /dev/null +++ b/spec/ruby/core/string/uplus_spec.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: false +require_relative '../../spec_helper' + +describe 'String#+@' do + it 'returns an unfrozen copy of a frozen String' do + input = 'foo'.freeze + output = +input + + output.should_not.frozen? + output.should == 'foo' + + output << 'bar' + output.should == 'foobar' + end + + it 'returns a mutable String itself' do + input = String.new("foo") + output = +input + + output.should.equal?(input) + + input << "bar" + output.should == "foobar" + end + + context 'if file has "frozen_string_literal: true" magic comment' do + it 'returns mutable copy of a literal' do + ruby_exe(fixture(__FILE__, "freeze_magic_comment.rb")).should == 'mutable' + end + end + + context 'if file has "frozen_string_literal: false" magic comment' do + it 'returns literal string itself' do + input = 'foo' + output = +input + + output.equal?(input).should == true + end + end + + context 'if file has no frozen_string_literal magic comment' do + ruby_version_is ''...'3.4' do + it 'returns literal string itself' do + eval(<<~RUBY).should == true + s = "foo" + s.equal?(+s) + RUBY + end + end + + ruby_version_is '3.4' do + it 'returns mutable copy of a literal' do + eval(<<~RUBY).should == false + s = "foo" + s.equal?(+s) + RUBY + end + end + end +end diff --git a/spec/ruby/core/string/upto_spec.rb b/spec/ruby/core/string/upto_spec.rb new file mode 100644 index 0000000000..8bc847d5ac --- /dev/null +++ b/spec/ruby/core/string/upto_spec.rb @@ -0,0 +1,110 @@ +require_relative '../../spec_helper' +require_relative 'fixtures/classes' + +describe "String#upto" do + it "passes successive values, starting at self and ending at other_string, to the block" do + a = [] + "*+".upto("*3") { |s| a << s } + a.should == ["*+", "*,", "*-", "*.", "*/", "*0", "*1", "*2", "*3"] + end + + it "calls the block once even when start equals stop" do + a = [] + "abc".upto("abc") { |s| a << s } + a.should == ["abc"] + end + + it "doesn't call block with self even if self is less than stop but stop length is less than self length" do + a = [] + "25".upto("5") { |s| a << s } + a.should == [] + end + + it "doesn't call block if stop is less than self and stop length is less than self length" do + a = [] + "25".upto("1") { |s| a << s } + a.should == [] + end + + it "doesn't call the block if self is greater than stop" do + a = [] + "5".upto("2") { |s| a << s } + a.should == [] + end + + it "stops iterating as soon as the current value's character count gets higher than stop's" do + a = [] + "96".upto("AA") { |s| a << s } + a.should == ["96", "97", "98", "99"] + end + + it "returns self" do + "abc".upto("abd") { }.should == "abc" + "5".upto("2") { |i| i }.should == "5" + end + + it "tries to convert other to string using to_str" do + other = mock('abd') + def other.to_str() "abd" end + + a = [] + "abc".upto(other) { |s| a << s } + a.should == ["abc", "abd"] + end + + it "raises a TypeError if other can't be converted to a string" do + -> { "abc".upto(123) { } }.should raise_error(TypeError) + -> { "abc".upto(mock('x')){ } }.should raise_error(TypeError) + end + + + it "does not work with symbols" do + -> { "a".upto(:c).to_a }.should raise_error(TypeError) + end + + it "returns non-alphabetic characters in the ASCII range for single letters" do + "9".upto("A").to_a.should == ["9", ":", ";", "<", "=", ">", "?", "@", "A"] + "Z".upto("a").to_a.should == ["Z", "[", "\\", "]", "^", "_", "`", "a"] + "z".upto("~").to_a.should == ["z", "{", "|", "}", "~"] + end + + it "stops before the last value if exclusive" do + a = [] + "a".upto("d", true) { |s| a << s} + a.should == ["a", "b", "c"] + end + + it "works with non-ASCII ranges" do + a = [] + 'Σ'.upto('Ω') { |s| a << s } + a.should == ["Σ", "Τ", "Υ", "Φ", "Χ", "Ψ", "Ω"] + end + + it "raises Encoding::CompatibilityError when incompatible characters are given" do + char1 = 'a'.dup.force_encoding("EUC-JP") + char2 = 'b'.dup.force_encoding("ISO-2022-JP") + -> { char1.upto(char2) {} }.should raise_error(Encoding::CompatibilityError, "incompatible character encodings: EUC-JP and ISO-2022-JP") + end + + describe "on sequence of numbers" do + it "calls the block as Integer#upto" do + "8".upto("11").to_a.should == 8.upto(11).map(&:to_s) + end + end + + describe "when no block is given" do + it "returns an enumerator" do + enum = "aaa".upto("baa", true) + enum.should be_an_instance_of(Enumerator) + enum.count.should == 26**2 + end + + describe "returned Enumerator" do + describe "size" do + it "should return nil" do + "a".upto("b").size.should == nil + end + end + end + end +end diff --git a/spec/ruby/core/string/valid_encoding/utf_8_spec.rb b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb new file mode 100644 index 0000000000..a14c3af830 --- /dev/null +++ b/spec/ruby/core/string/valid_encoding/utf_8_spec.rb @@ -0,0 +1,214 @@ +# -*- encoding: utf-8 -*- +require_relative '../../../spec_helper' + +describe "String#valid_encoding? and UTF-8" do + def utf8(bytes) + bytes.pack("C*").force_encoding("UTF-8") + end + + describe "1-byte character" do + it "is valid if is in format 0xxxxxxx" do + utf8([0b00000000]).valid_encoding?.should == true + utf8([0b01111111]).valid_encoding?.should == true + end + + it "is not valid if is not in format 0xxxxxxx" do + utf8([0b10000000]).valid_encoding?.should == false + utf8([0b11111111]).valid_encoding?.should == false + end + end + + describe "2-bytes character" do + it "is valid if in format [110xxxxx 10xxxxx]" do + utf8([0b11000010, 0b10000000]).valid_encoding?.should == true + utf8([0b11000010, 0b10111111]).valid_encoding?.should == true + + utf8([0b11011111, 0b10000000]).valid_encoding?.should == true + utf8([0b11011111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 110xxxxx" do + utf8([0b00000010, 0b10000000]).valid_encoding?.should == false + utf8([0b00100010, 0b10000000]).valid_encoding?.should == false + utf8([0b01000010, 0b10000000]).valid_encoding?.should == false + utf8([0b01100010, 0b10000000]).valid_encoding?.should == false + utf8([0b10000010, 0b10000000]).valid_encoding?.should == false + utf8([0b10100010, 0b10000000]).valid_encoding?.should == false + utf8([0b11000010, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11100010, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11000010, 0b00000000]).valid_encoding?.should == false + utf8([0b11000010, 0b01000000]).valid_encoding?.should == false + utf8([0b11000010, 0b11000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxxxx10 xx000000] (codepoints < U+007F, that are encoded with the 1-byte format)" do + utf8([0b11000000, 0b10111111]).valid_encoding?.should == false + utf8([0b11000001, 0b10111111]).valid_encoding?.should == false + end + + it "is not valid if the first byte is missing" do + bytes = [0b11000010, 0b10000000] + utf8(bytes[1..1]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11000010, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end + + describe "3-bytes character" do + it "is valid if in format [1110xxxx 10xxxxxx 10xxxxxx]" do + utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true + utf8([0b11100000, 0b10100000, 0b10111111]).valid_encoding?.should == true + utf8([0b11100000, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11101111, 0b10111111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 1110xxxx" do + utf8([0b00000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b00110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b01110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10100000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b10110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11000000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11010000, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10100000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11100000, 0b00100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b01100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b11100000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the third byte is not in format 10xxxxxx" do + utf8([0b11100000, 0b10100000, 0b00000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10100000, 0b01000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxx0000 xx100000 xx000000] (codepoints < U+07FF that are encoded with the 2-byte format)" do + utf8([0b11100000, 0b10010000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10001000, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000100, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000010, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000001, 0b10000000]).valid_encoding?.should == false + utf8([0b11100000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if in range [xxxx1101 xx100000 xx000000] - [xxxx1101 xx111111 xx111111] (codepoints U+D800 - U+DFFF)" do + utf8([0b11101101, 0b10100000, 0b10000000]).valid_encoding?.should == false + utf8([0b11101101, 0b10100000, 0b10000001]).valid_encoding?.should == false + utf8([0b11101101, 0b10111111, 0b10111111]).valid_encoding?.should == false + + utf8([0b11101101, 0b10011111, 0b10111111]).valid_encoding?.should == true # lower boundary - 1 + utf8([0b11101110, 0b10000000, 0b10000000]).valid_encoding?.should == true # upper boundary + 1 + end + + it "is not valid if the first byte is missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8(bytes[2..3]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8([bytes[0], bytes[2]]).valid_encoding?.should == false + end + + it "is not valid if the second and the third bytes are missing" do + bytes = [0b11100000, 0b10100000, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end + + describe "4-bytes character" do + it "is valid if in format [11110xxx 10xxxxxx 10xxxxxx 10xxxxxx]" do + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true + utf8([0b11110000, 0b10010000, 0b10000000, 0b10111111]).valid_encoding?.should == true + utf8([0b11110000, 0b10010000, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11110000, 0b10111111, 0b10111111, 0b10111111]).valid_encoding?.should == true + utf8([0b11110100, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == true + end + + it "is not valid if the first byte is not in format 11110xxx" do + utf8([0b11100000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11010000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b10110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b01110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the second byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b00010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b01010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b11010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the third byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b10010000, 0b00000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b01000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10010000, 0b11000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if the forth byte is not in format 10xxxxxx" do + utf8([0b11110000, 0b10010000, 0b10000000, 0b00000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b01000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == true # correct bytes + utf8([0b11110000, 0b10010000, 0b10000000, 0b11000000]).valid_encoding?.should == false + end + + it "is not valid if is smaller than [xxxxx000 xx001000 xx000000 xx000000] (codepoint < U+10000)" do + utf8([0b11110000, 0b10000111, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000110, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000101, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000100, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000011, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000010, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000001, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110000, 0b10000000, 0b10000000, 0b10000000]).valid_encoding?.should == false + end + + it "is not valid if is greater than [xxxxx100 xx001111 xx111111 xx111111] (codepoint > U+10FFFF)" do + utf8([0b11110100, 0b10010000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110100, 0b10100000, 0b10000000, 0b10000000]).valid_encoding?.should == false + utf8([0b11110100, 0b10110000, 0b10000000, 0b10000000]).valid_encoding?.should == false + + utf8([0b11110101, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + utf8([0b11110110, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + utf8([0b11110111, 0b10001111, 0b10111111, 0b10111111]).valid_encoding?.should == false + end + + it "is not valid if the first byte is missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8(bytes[1..3]).valid_encoding?.should == false + end + + it "is not valid if the second byte is missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8([bytes[0], bytes[2], bytes[3]]).valid_encoding?.should == false + end + + it "is not valid if the second and the third bytes are missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8([bytes[0], bytes[3]]).valid_encoding?.should == false + end + + it "is not valid if the second, the third and the fourth bytes are missing" do + bytes = [0b11110000, 0b10010000, 0b10000000, 0b10000000] + utf8(bytes[0..0]).valid_encoding?.should == false + end + end +end diff --git a/spec/ruby/core/string/valid_encoding_spec.rb b/spec/ruby/core/string/valid_encoding_spec.rb new file mode 100644 index 0000000000..375035cd94 --- /dev/null +++ b/spec/ruby/core/string/valid_encoding_spec.rb @@ -0,0 +1,133 @@ +require_relative '../../spec_helper' + +describe "String#valid_encoding?" do + it "returns true if the String's encoding is valid" do + "a".valid_encoding?.should be_true + "\u{8365}\u{221}".valid_encoding?.should be_true + end + + it "returns true if self is valid in the current encoding and other encodings" do + str = +"\x77" + str.force_encoding('utf-8').valid_encoding?.should be_true + str.force_encoding('binary').valid_encoding?.should be_true + end + + it "returns true for all encodings self is valid in" do + str = +"\xE6\x9D\x94" + str.force_encoding('BINARY').valid_encoding?.should be_true + str.force_encoding('UTF-8').valid_encoding?.should be_true + str.force_encoding('US-ASCII').valid_encoding?.should be_false + str.force_encoding('Big5').valid_encoding?.should be_false + str.force_encoding('CP949').valid_encoding?.should be_false + str.force_encoding('Emacs-Mule').valid_encoding?.should be_false + str.force_encoding('EUC-JP').valid_encoding?.should be_false + str.force_encoding('EUC-KR').valid_encoding?.should be_false + str.force_encoding('EUC-TW').valid_encoding?.should be_false + str.force_encoding('GB18030').valid_encoding?.should be_false + str.force_encoding('GBK').valid_encoding?.should be_false + str.force_encoding('ISO-8859-1').valid_encoding?.should be_true + str.force_encoding('ISO-8859-2').valid_encoding?.should be_true + str.force_encoding('ISO-8859-3').valid_encoding?.should be_true + str.force_encoding('ISO-8859-4').valid_encoding?.should be_true + str.force_encoding('ISO-8859-5').valid_encoding?.should be_true + str.force_encoding('ISO-8859-6').valid_encoding?.should be_true + str.force_encoding('ISO-8859-7').valid_encoding?.should be_true + str.force_encoding('ISO-8859-8').valid_encoding?.should be_true + str.force_encoding('ISO-8859-9').valid_encoding?.should be_true + str.force_encoding('ISO-8859-10').valid_encoding?.should be_true + str.force_encoding('ISO-8859-11').valid_encoding?.should be_true + str.force_encoding('ISO-8859-13').valid_encoding?.should be_true + str.force_encoding('ISO-8859-14').valid_encoding?.should be_true + str.force_encoding('ISO-8859-15').valid_encoding?.should be_true + str.force_encoding('ISO-8859-16').valid_encoding?.should be_true + str.force_encoding('KOI8-R').valid_encoding?.should be_true + str.force_encoding('KOI8-U').valid_encoding?.should be_true + str.force_encoding('Shift_JIS').valid_encoding?.should be_false + "\xD8\x00".dup.force_encoding('UTF-16BE').valid_encoding?.should be_false + "\x00\xD8".dup.force_encoding('UTF-16LE').valid_encoding?.should be_false + "\x04\x03\x02\x01".dup.force_encoding('UTF-32BE').valid_encoding?.should be_false + "\x01\x02\x03\x04".dup.force_encoding('UTF-32LE').valid_encoding?.should be_false + str.force_encoding('Windows-1251').valid_encoding?.should be_true + str.force_encoding('IBM437').valid_encoding?.should be_true + str.force_encoding('IBM737').valid_encoding?.should be_true + str.force_encoding('IBM775').valid_encoding?.should be_true + str.force_encoding('CP850').valid_encoding?.should be_true + str.force_encoding('IBM852').valid_encoding?.should be_true + str.force_encoding('CP852').valid_encoding?.should be_true + str.force_encoding('IBM855').valid_encoding?.should be_true + str.force_encoding('CP855').valid_encoding?.should be_true + str.force_encoding('IBM857').valid_encoding?.should be_true + str.force_encoding('IBM860').valid_encoding?.should be_true + str.force_encoding('IBM861').valid_encoding?.should be_true + str.force_encoding('IBM862').valid_encoding?.should be_true + str.force_encoding('IBM863').valid_encoding?.should be_true + str.force_encoding('IBM864').valid_encoding?.should be_true + str.force_encoding('IBM865').valid_encoding?.should be_true + str.force_encoding('IBM866').valid_encoding?.should be_true + str.force_encoding('IBM869').valid_encoding?.should be_true + str.force_encoding('Windows-1258').valid_encoding?.should be_true + str.force_encoding('GB1988').valid_encoding?.should be_true + str.force_encoding('macCentEuro').valid_encoding?.should be_true + str.force_encoding('macCroatian').valid_encoding?.should be_true + str.force_encoding('macCyrillic').valid_encoding?.should be_true + str.force_encoding('macGreek').valid_encoding?.should be_true + str.force_encoding('macIceland').valid_encoding?.should be_true + str.force_encoding('macRoman').valid_encoding?.should be_true + str.force_encoding('macRomania').valid_encoding?.should be_true + str.force_encoding('macThai').valid_encoding?.should be_true + str.force_encoding('macTurkish').valid_encoding?.should be_true + str.force_encoding('macUkraine').valid_encoding?.should be_true + str.force_encoding('stateless-ISO-2022-JP').valid_encoding?.should be_false + str.force_encoding('eucJP-ms').valid_encoding?.should be_false + str.force_encoding('CP51932').valid_encoding?.should be_false + str.force_encoding('GB2312').valid_encoding?.should be_false + str.force_encoding('GB12345').valid_encoding?.should be_false + str.force_encoding('ISO-2022-JP').valid_encoding?.should be_true + str.force_encoding('ISO-2022-JP-2').valid_encoding?.should be_true + str.force_encoding('CP50221').valid_encoding?.should be_true + str.force_encoding('Windows-1252').valid_encoding?.should be_true + str.force_encoding('Windows-1250').valid_encoding?.should be_true + str.force_encoding('Windows-1256').valid_encoding?.should be_true + str.force_encoding('Windows-1253').valid_encoding?.should be_true + str.force_encoding('Windows-1255').valid_encoding?.should be_true + str.force_encoding('Windows-1254').valid_encoding?.should be_true + str.force_encoding('TIS-620').valid_encoding?.should be_true + str.force_encoding('Windows-874').valid_encoding?.should be_true + str.force_encoding('Windows-1257').valid_encoding?.should be_true + str.force_encoding('Windows-31J').valid_encoding?.should be_false + str.force_encoding('MacJapanese').valid_encoding?.should be_false + str.force_encoding('UTF-7').valid_encoding?.should be_true + str.force_encoding('UTF8-MAC').valid_encoding?.should be_true + end + + it "returns true for IBM720 encoding self is valid in" do + str = +"\xE6\x9D\x94" + str.force_encoding('IBM720').valid_encoding?.should be_true + str.force_encoding('CP720').valid_encoding?.should be_true + end + + it "returns false if self is valid in one encoding, but invalid in the one it's tagged with" do + str = +"\u{8765}" + str.valid_encoding?.should be_true + str.force_encoding('ascii') + str.valid_encoding?.should be_false + end + + it "returns false if self contains a character invalid in the associated encoding" do + "abc#{[0x80].pack('C')}".dup.force_encoding('ascii').valid_encoding?.should be_false + end + + it "returns false if a valid String had an invalid character appended to it" do + str = +"a" + str.valid_encoding?.should be_true + str << [0xDD].pack('C').force_encoding('utf-8') + str.valid_encoding?.should be_false + end + + it "returns true if an invalid string is appended another invalid one but both make a valid string" do + str = [0xD0].pack('C').force_encoding('utf-8') + str.valid_encoding?.should be_false + str << [0xBF].pack('C').force_encoding('utf-8') + str.valid_encoding?.should be_true + end +end |
